{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Salary Survey"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import db\n",
    "import inspect\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as stats\n",
    "import numpy as np\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_original=pd.read_sql(sql=\"select * from _201904 where monthly_salary>0 and monthly_salary<150000\", con=db.get_conn())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(139688, 159)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_original.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "error_job_ids=['104660258','104142922','108434795','101357291','106253516','110368302','111391233','108665401','109277048'\n",
    "                  ,'73857191','108584955','102824950','102824949','111391233','110884556']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=data_original[~data_original.job_id.isin(error_job_ids)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(139675, 159)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "del data['publish_date']\n",
    "del data['published_on_weekend']\n",
    "del data['title']\n",
    "del data['company_title']\n",
    "del data['company_description']\n",
    "del data['job_description']\n",
    "del data['job_id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Salary Distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1fcc8853860>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD8CAYAAACLrvgBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAGG5JREFUeJzt3X+QZXWZ3/H3J8zioisCsnZRMySD68QENYnQhSRmra5lFwbWOCTRFBQVplyqptbgRhO2VoxVYcsfVZoNSxZK2ZqVCYNFRJfVmqkEg1NIx9oqQcAfDIg4LRJpGWF1EB2NmjFP/rjfrlyb293n3jtzb298v6pu9TnP+Z5zn3v6Tn/6/Lg9qSokSerib0y7AUnSXx+GhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmcbpt3A0XbqqafW5s2bR1r3hz/8IS94wQuObkNHgX0Nx76GY1/DWa99wXi9PfDAA9+pql9dc2BV/X/1OPvss2tUd99998jrHkv2NRz7Go59DWe99lU1Xm/A/dXhZ6ynpyRJnRkakqTODA1JUmeGhiSpszVDI8muJE8neWjAst9PUklObfNJcn2ShSQPJjmrb+z2JAfaY3tf/ewk+9s61ydJq5+SZF8bvy/JyUfnJUuSRtXlSONmYOvyYpLTgd8CvtlXvhDY0h47gBvb2FOAa4DXAOcA1/SFwI1t7NJ6S891NXBXVW0B7mrzkqQpWjM0quqzwKEBi64D/gDo/6//tgG3tDu47gFOSnIacAGwr6oOVdUzwD5ga1t2YlV9rt3ydQtwcd+2drfp3X11SdKUjHRNI8kbgG9V1ZeXLdoIPNE3v9hqq9UXB9QBZqrqIED7+pJRepUkHT1DfyI8yfOBdwHnD1o8oFYj1IftaQe9U1zMzMwwPz8/7CYAOHz48MjrHkv2NRz7Go59DWe99gWT6W2UPyPya8AZwJfbNetNwBeSnEPvSOH0vrGbgCdbfW5Zfb7VNw0YD/BUktOq6mA7jfX0Sg1V1U5gJ8Ds7GzNzc2tNHRVN9y6h2v/8ocjrTuux9//2ysum5+fZ9TXdCzZ13Dsazj2NbxJ9Db06amq2l9VL6mqzVW1md4P/rOq6tvAXuDydhfVucCz7dTSncD5SU5uF8DPB+5sy36Q5Nx219TlwJ72VHuBpbustvfVJUlT0uWW248CnwNenmQxyRWrDL8DeAxYAP4M+FcAVXUIeA9wX3u8u9UA3gJ8uK3zdeBTrf5+4LeSHKB3l9b7h3tpkqSjbc3TU1V16RrLN/dNF3DlCuN2AbsG1O8HXjmg/l3gvLX6kyRNjp8IlyR1ZmhIkjozNCRJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmdrhkaSXUmeTvJQX+2Pknw1yYNJPpnkpL5l70yykOTRJBf01be22kKSq/vqZyS5N8mBJB9LcnyrP6/NL7Tlm4/Wi5YkjabLkcbNwNZltX3AK6vq7wFfA94JkORM4BLgFW2dDyU5LslxwAeBC4EzgUvbWIAPANdV1RbgGeCKVr8CeKaqXgZc18ZJkqZozdCoqs8Ch5bVPl1VR9rsPcCmNr0NuK2qflJV3wAWgHPaY6GqHquqnwK3AduSBPgN4Pa2/m7g4r5t7W7TtwPntfGSpCk5Gtc0fgf4VJveCDzRt2yx1Vaqvxj4Xl8ALdV/bltt+bNtvCRpSjaMs3KSdwFHgFuXSgOGFYPDqVYZv9q2BvWxA9gBMDMzw/z8/MpNr2LmBLjqVUfWHngMrNbz4cOHR35Nx5J9Dce+hmNfw5tEbyOHRpLtwOuB86pq6Yf5InB637BNwJNtelD9O8BJSTa0o4n+8UvbWkyyAXgRy06TLamqncBOgNnZ2ZqbmxvpNd1w6x6u3T9Wjo7s8cvmVlw2Pz/PqK/pWLKv4djXcOxreJPobaTTU0m2Au8A3lBVP+pbtBe4pN35dAawBfg8cB+wpd0pdTy9i+V7W9jcDbyxrb8d2NO3re1t+o3AZ/rCSZI0BWv+Wp3ko8AccGqSReAaendLPQ/Y165N31NVv1tVDyf5OPAVeqetrqyqn7XtvBW4EzgO2FVVD7eneAdwW5L3Al8Ebmr1m4CPJFmgd4RxyVF4vZKkMawZGlV16YDyTQNqS+PfB7xvQP0O4I4B9cfo3V21vP5j4E1r9SdJmhw/ES5J6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTO1gyNJLuSPJ3kob7aKUn2JTnQvp7c6klyfZKFJA8mOatvne1t/IEk2/vqZyfZ39a5PklWew5J0vR0OdK4Gdi6rHY1cFdVbQHuavMAFwJb2mMHcCP0AgC4BngNcA5wTV8I3NjGLq23dY3nkCRNyZqhUVWfBQ4tK28Ddrfp3cDFffVbquce4KQkpwEXAPuq6lBVPQPsA7a2ZSdW1eeqqoBblm1r0HNIkqZkw4jrzVTVQYCqOpjkJa2+EXiib9xiq61WXxxQX+05niPJDnpHK8zMzDA/Pz/aizoBrnrVkZHWHddqPR8+fHjk13Qs2ddw7Gs49jW8SfQ2amisJANqNUJ9KFW1E9gJMDs7W3Nzc8NuAoAbbt3DtfuP9i7p5vHL5lZcNj8/z6iv6Viyr+HY13Dsa3iT6G3Uu6eeaqeWaF+fbvVF4PS+cZuAJ9eobxpQX+05JElTMmpo7AWW7oDaDuzpq1/e7qI6F3i2nWK6Ezg/ycntAvj5wJ1t2Q+SnNvumrp82bYGPYckaUrWPBeT5KPAHHBqkkV6d0G9H/h4kiuAbwJvasPvAC4CFoAfAW8GqKpDSd4D3NfGvbuqli6uv4XeHVonAJ9qD1Z5DknSlKwZGlV16QqLzhswtoArV9jOLmDXgPr9wCsH1L876DkkSdPjJ8IlSZ0ZGpKkzgwNSVJnhoYkqTNDQ5LUmaEhSerM0JAkdWZoSJI6MzQkSZ0ZGpKkzgwNSVJnhoYkqTNDQ5LUmaEhSerM0JAkdWZoSJI6MzQkSZ0ZGpKkzgwNSVJnhoYkqbOxQiPJv0nycJKHknw0yS8nOSPJvUkOJPlYkuPb2Oe1+YW2fHPfdt7Z6o8muaCvvrXVFpJcPU6vkqTxjRwaSTYC/xqYrapXAscBlwAfAK6rqi3AM8AVbZUrgGeq6mXAdW0cSc5s670C2Ap8KMlxSY4DPghcCJwJXNrGSpKmZNzTUxuAE5JsAJ4PHAR+A7i9Ld8NXNymt7V52vLzkqTVb6uqn1TVN4AF4Jz2WKiqx6rqp8BtbawkaUpGDo2q+hbwH4Fv0guLZ4EHgO9V1ZE2bBHY2KY3Ak+0dY+08S/ury9bZ6W6JGlKNoy6YpKT6f3mfwbwPeDP6Z1KWq6WVllh2Ur1QYFWA2ok2QHsAJiZmWF+fn611lc0cwJc9aojaw88Blbr+fDhwyO/pmPJvoZjX8Oxr+FNoreRQwP4TeAbVfVXAEk+Afwj4KQkG9rRxCbgyTZ+ETgdWGyns14EHOqrL+lfZ6X6z6mqncBOgNnZ2ZqbmxvpBd1w6x6u3T/OLhnd45fNrbhsfn6eUV/TsWRfw7Gv4djX8CbR2zjXNL4JnJvk+e3axHnAV4C7gTe2MduBPW16b5unLf9MVVWrX9LurjoD2AJ8HrgP2NLuxjqe3sXyvWP0K0ka08i/VlfVvUluB74AHAG+SO+3/f8G3Jbkva12U1vlJuAjSRboHWFc0rbzcJKP0wucI8CVVfUzgCRvBe6kd2fWrqp6eNR+JUnjG+tcTFVdA1yzrPwYvTuflo/9MfCmFbbzPuB9A+p3AHeM06Mk6ejxE+GSpM4MDUlSZ4aGJKkzQ0OS1JmhIUnqzNCQJHVmaEiSOjM0JEmdGRqSpM4MDUlSZ4aGJKkzQ0OS1JmhIUnqzNCQJHVmaEiSOjM0JEmdGRqSpM4MDUlSZ4aGJKkzQ0OS1JmhIUnqbKzQSHJSktuTfDXJI0n+YZJTkuxLcqB9PbmNTZLrkywkeTDJWX3b2d7GH0iyva9+dpL9bZ3rk2ScfiVJ4xn3SONPgP9eVX8H+PvAI8DVwF1VtQW4q80DXAhsaY8dwI0ASU4BrgFeA5wDXLMUNG3Mjr71to7ZryRpDCOHRpITgdcBNwFU1U+r6nvANmB3G7YbuLhNbwNuqZ57gJOSnAZcAOyrqkNV9QywD9jalp1YVZ+rqgJu6duWJGkKxjnSeCnwV8B/TvLFJB9O8gJgpqoOArSvL2njNwJP9K2/2Gqr1RcH1CVJU7JhzHXPAn6vqu5N8if8v1NRgwy6HlEj1J+74WQHvdNYzMzMMD8/v0obK5s5Aa561ZGR1h3Xaj0fPnx45Nd0LNnXcOxrOPY1vEn0Nk5oLAKLVXVvm7+dXmg8leS0qjrYTjE93Tf+9L71NwFPtvrcsvp8q28aMP45qmonsBNgdna25ubmBg1b0w237uHa/ePsktE9ftncisvm5+cZ9TUdS/Y1HPsajn0NbxK9jXx6qqq+DTyR5OWtdB7wFWAvsHQH1HZgT5veC1ze7qI6F3i2nb66Ezg/ycntAvj5wJ1t2Q+SnNvumrq8b1uSpCkY99fq3wNuTXI88BjwZnpB9PEkVwDfBN7Uxt4BXAQsAD9qY6mqQ0neA9zXxr27qg616bcANwMnAJ9qD0nSlIwVGlX1JWB2wKLzBowt4MoVtrML2DWgfj/wynF6lCQdPX4iXJLUmaEhSerM0JAkdWZoSJI6MzQkSZ0ZGpKkzgwNSVJnhoYkqTNDQ5LUmaEhSerM0JAkdWZoSJI6MzQkSZ0ZGpKkzgwNSVJnhoYkqTNDQ5LUmaEhSerM0JAkdWZoSJI6MzQkSZ2NHRpJjkvyxST/tc2fkeTeJAeSfCzJ8a3+vDa/0JZv7tvGO1v90SQX9NW3ttpCkqvH7VWSNJ6jcaTxNuCRvvkPANdV1RbgGeCKVr8CeKaqXgZc18aR5EzgEuAVwFbgQy2IjgM+CFwInAlc2sZKkqZkrNBIsgn4beDDbT7AbwC3tyG7gYvb9LY2T1t+Xhu/Dbitqn5SVd8AFoBz2mOhqh6rqp8Ct7WxkqQp2TDm+v8J+APghW3+xcD3qupIm18ENrbpjcATAFV1JMmzbfxG4J6+bfav88Sy+msGNZFkB7ADYGZmhvn5+ZFezMwJcNWrjqw98BhYrefDhw+P/JqOJfsajn0Nx76GN4neRg6NJK8Hnq6qB5LMLZUHDK01lq1UH3QUVANqVNVOYCfA7Oxszc3NDRq2phtu3cO1+8fN0dE8ftncisvm5+cZ9TUdS/Y1HPsajn0NbxK9jfMT8rXAG5JcBPwycCK9I4+TkmxoRxubgCfb+EXgdGAxyQbgRcChvvqS/nVWqkuSpmDkaxpV9c6q2lRVm+ldyP5MVV0G3A28sQ3bDuxp03vbPG35Z6qqWv2SdnfVGcAW4PPAfcCWdjfW8e059o7aryRpfMfiXMw7gNuSvBf4InBTq98EfCTJAr0jjEsAqurhJB8HvgIcAa6sqp8BJHkrcCdwHLCrqh4+Bv1Kkjo6KqFRVfPAfJt+jN6dT8vH/Bh40wrrvw9434D6HcAdR6NHSdL4/ES4JKkzQ0OS1JmhIUnqzNCQJHVmaEiSOjM0JEmdGRqSpM4MDUlSZ4aGJKkzQ0OS1JmhIUnqzNCQJHVmaEiSOjM0JEmdGRqSpM4MDUlSZ4aGJKkzQ0OS1JmhIUnqzNCQJHU2cmgkOT3J3UkeSfJwkre1+ilJ9iU50L6e3OpJcn2ShSQPJjmrb1vb2/gDSbb31c9Osr+tc32SjPNiJUnjGedI4whwVVX9XeBc4MokZwJXA3dV1RbgrjYPcCGwpT12ADdCL2SAa4DXAOcA1ywFTRuzo2+9rWP0K0ka08ihUVUHq+oLbfoHwCPARmAbsLsN2w1c3Ka3AbdUzz3ASUlOAy4A9lXVoap6BtgHbG3LTqyqz1VVAbf0bUuSNAVH5ZpGks3Aq4F7gZmqOgi9YAFe0oZtBJ7oW22x1VarLw6oS5KmZMO4G0jyK8BfAG+vqu+vctlh0IIaoT6ohx30TmMxMzPD/Pz8Gl0PNnMCXPWqIyOtO67Vej58+PDIr+lYsq/h2Ndw7Gt4k+htrNBI8kv0AuPWqvpEKz+V5LSqOthOMT3d6ovA6X2rbwKebPW5ZfX5Vt80YPxzVNVOYCfA7Oxszc3NDRq2phtu3cO1+8fO0ZE8ftncisvm5+cZ9TUdS/Y1HPsajn0NbxK9jXP3VICbgEeq6o/7Fu0Flu6A2g7s6atf3u6iOhd4tp2+uhM4P8nJ7QL4+cCdbdkPkpzbnuvyvm1JkqZgnF+rXwv8S2B/ki+12r8D3g98PMkVwDeBN7VldwAXAQvAj4A3A1TVoSTvAe5r495dVYfa9FuAm4ETgE+1hyRpSkYOjar6SwZfdwA4b8D4Aq5cYVu7gF0D6vcDrxy1R0nS0eUnwiVJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjozNCRJnRkakqTODA1JUmeGhiSpM0NDktTZug+NJFuTPJpkIcnV0+5Hkn6RrevQSHIc8EHgQuBM4NIkZ063K0n6xbWuQwM4B1ioqseq6qfAbcC2KfckSb+w1ntobASe6JtfbDVJ0hRsmHYDa8iAWj1nULID2NFmDyd5dMTnOxX4zojrjiUfWHXx1Ppag30Nx76GY1/DG6e3v9Vl0HoPjUXg9L75TcCTywdV1U5g57hPluT+qpoddztHm30Nx76GY1/DWa99wWR6W++np+4DtiQ5I8nxwCXA3in3JEm/sNb1kUZVHUnyVuBO4DhgV1U9POW2JOkX1roODYCqugO4Y0JPN/YprmPEvoZjX8Oxr+Gs175gAr2l6jnXlSVJGmi9X9OQJK0jhkazHv5cSZLTk9yd5JEkDyd5W6v/YZJvJflSe1w0pf4eT7K/9XB/q52SZF+SA+3ryRPu6eV9++VLSb6f5O3T2GdJdiV5OslDfbWB+yc917f324NJzppwX3+U5KvtuT+Z5KRW35zkf/Xttz+dcF8rft+SvLPtr0eTXDDhvj7W19PjSb7U6pPcXyv9fJjse6yqfuEf9C6yfx14KXA88GXgzCn0cRpwVpt+IfA1en8+5Q+B318H++lx4NRltf8AXN2mrwY+MOXv47fp3W8+8X0GvA44C3horf0DXAR8it5nkc4F7p1wX+cDG9r0B/r62tw/bgr7a+D3rf07+DLwPOCM9u/1uEn1tWz5tcC/n8L+Wunnw0TfYx5p9KyLP1dSVQer6gtt+gfAI6z/T8BvA3a36d3AxVPs5Tzg61X1P6fx5FX1WeDQsvJK+2cbcEv13AOclOS0SfVVVZ+uqiNt9h56n4GaqBX210q2AbdV1U+q6hvAAr1/txPtK0mAfwF89Fg892pW+fkw0feYodGz7v5cSZLNwKuBe1vpre0Qc9ekTwH1KeDTSR5I71P4ADNVdRB6b2rgJVPqDXqf4+n/x7we9tlK+2c9ved+h95vpEvOSPLFJP8jya9PoZ9B37f1sr9+HXiqqg701Sa+v5b9fJjoe8zQ6On050omJcmvAH8BvL2qvg/cCPwa8A+Ag/QOj6fhtVV1Fr2/OnxlktdNqY/nSO/Dn28A/ryV1ss+W8m6eM8leRdwBLi1lQ4Cf7OqXg38W+C/JDlxgi2t9H1bF/sLuJSf/8Vk4vtrwM+HFYcOqI29zwyNnk5/rmQSkvwSvTfErVX1CYCqeqqqflZV/wf4M47RYflaqurJ9vVp4JOtj6eWDnnb16en0Ru9IPtCVT3VelwX+4yV98/U33NJtgOvBy6rdhK8nf75bpt+gN61g789qZ5W+b6th/21AfhnwMeWapPeX4N+PjDh95ih0bMu/lxJO196E/BIVf1xX73/POQ/BR5avu4EentBkhcuTdO7kPoQvf20vQ3bDuyZdG/Nz/0GuB72WbPS/tkLXN7ucDkXeHbpFMMkJNkKvAN4Q1X9qK/+q+n9PzYkeSmwBXhsgn2t9H3bC1yS5HlJzmh9fX5SfTW/CXy1qhaXCpPcXyv9fGDS77FJXPX/6/Cgd6fB1+j9pvCuKfXwj+kdPj4IfKk9LgI+Auxv9b3AaVPo7aX07l75MvDw0j4CXgzcBRxoX0+ZQm/PB74LvKivNvF9Ri+0DgL/m95veVestH/onTr4YHu/7QdmJ9zXAr3z3Uvvsz9tY/95+/5+GfgC8E8m3NeK3zfgXW1/PQpcOMm+Wv1m4HeXjZ3k/lrp58NE32N+IlyS1JmnpyRJnRkakqTODA1JUmeGhiSpM0NDktSZoSFJ6szQkCR1ZmhIkjr7vw0Okt9zjFqRAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data.monthly_salary.hist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It does not look like normal distribution."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NormaltestResult(statistic=80485.89395778668, pvalue=0.0)"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.normaltest(data.monthly_salary)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "null hypothesis: x comes from a normal distribution\n",
    "    \n",
    "p=0\n",
    "\n",
    "The null hypothesis can be rejected\n",
    "\n",
    "conclusion: data is not normally distributed."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Zoom in"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1eb28bf1278>"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFkdJREFUeJzt3X+MXfV55/H3U5sfFg7YhGRk2WhNttZuSNxSPEu8YlsNEIH5oZo/wooIFZOlspSSVdp6Vcx2u7QJSE5XLCnalNYbvJhsW0PTRljYLGs5jLLVJvwKBOOw1BNjJQ4IK2tDcZKSneTZP+53wmW+985cz1zPPdjvl3Q15zzne8997rm+85nz415HZiJJUrtfGHQDkqTmMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUmT/oBmbqnHPOyeXLl3dc9sMf/pAzzjhjbhvqUZN7g2b31+TeoNn9Nbk3aHZ/J1JvzzzzzA8y8309Dc7Md+Vt1apV2c3jjz/eddmgNbm3zGb31+TeMpvdX5N7y2x2fydSb8DT2ePvWA8rSZIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqPX19RkQcAN4EfgqMZ+ZwRJwNPAgsBw4A/zozj0REAH8CXAX8CLgpM79Z1rMO+A9ltXdk5tZSXwXcDywAdgKfLp/mU58s37ijp3EbVo5zU49je3Fg09V9W5ekuXMsew6XZOYFmTlc5jcCuzNzBbC7zANcCawot/XAvQAlTG4HPgJcBNweEYvLfe4tYyfut2bGz0iSNGuzOay0FthaprcC17bVHyhf5fENYFFELAGuAHZl5uHMPALsAtaUZWdm5tfL3sIDbeuSJA1A9HL0JiJeBo4ACfx5Zm6OiNczc1HbmCOZuTgiHgE2Zebflfpu4FZgBDg9M+8o9T8AfgyMlvEfLfVfBW7NzGs69LGe1h4GQ0NDq7Zt29ax36NHj7Jw4cLetsAcG1Rve77/Rk/jhhbAaz/u3+OuXHpW39bV5NcVmt1fk3uDZvd3IvV2ySWXPNN29GdKvX5l98WZ+UpEvB/YFRH/Z4qx0aGWM6jXxczNwGaA4eHhHBkZ6djA6Ogo3ZYN2qB66/U8woaV49y1p3/f5H7ghpG+ravJrys0u78m9wbN7u9k7a2nw0qZ+Ur5eQj4Cq1zBq+VQ0KUn4fK8IPAuW13Xwa8Mk19WYe6JGlApg2HiDgjIt4zMQ1cDrwAbAfWlWHrgIfL9HbgxmhZDbyRma8CjwGXR8TiciL6cuCxsuzNiFhdrnS6sW1dkqQB6OX4wRDwldbvbeYDf5mZ/yMingIeioibge8C15XxO2ldxjpG61LWTwBk5uGI+CzwVBn3mcw8XKY/yduXsj5abpKkAZk2HDJzP/DLHer/F7isQz2BW7qsawuwpUP9aeDDPfQrSZoDfkJaklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklTpORwiYl5EPBsRj5T58yLiiYjYFxEPRsSppX5amR8ry5e3reO2Un8pIq5oq68ptbGI2Ni/pydJmolj2XP4NPBi2/zngLszcwVwBLi51G8GjmTmLwJ3l3FExPnA9cCHgDXAn5bAmQd8AbgSOB/4eBkrSRqQnsIhIpYBVwNfLPMBXAp8uQzZClxbpteWecryy8r4tcC2zHwrM18GxoCLym0sM/dn5k+AbWWsJGlAet1z+Dzwe8DPyvx7gdczc7zMHwSWlumlwPcAyvI3yvif1yfdp1tdkjQg86cbEBHXAIcy85mIGJkodxia0yzrVu8UUNmhRkSsB9YDDA0NMTo62rHno0ePdl02aIPqbcPK8ekHAUMLeh/bi34+1ya/rtDs/prcGzS7v5O1t2nDAbgY+PWIuAo4HTiT1p7EooiYX/YOlgGvlPEHgXOBgxExHzgLONxWn9B+n271d8jMzcBmgOHh4RwZGenY8OjoKN2WDdqgertp446exm1YOc5de3r5Z9GbAzeM9G1dTX5dodn9Nbk3aHZ/J2tv0x5WyszbMnNZZi6ndUL5q5l5A/A48LEybB3wcJneXuYpy7+amVnq15ermc4DVgBPAk8BK8rVT6eWx9jel2cnSZqR2fyJeCuwLSLuAJ4F7iv1+4AvRcQYrT2G6wEyc29EPAR8GxgHbsnMnwJExKeAx4B5wJbM3DuLviRJs3RM4ZCZo8Bomd5P60qjyWP+Ebiuy/3vBO7sUN8J7DyWXiRJx4+fkJYkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVJl2nCIiNMj4smI+FZE7I2IPyr18yLiiYjYFxEPRsSppX5amR8ry5e3reu2Un8pIq5oq68ptbGI2Nj/pylJOha97Dm8BVyamb8MXACsiYjVwOeAuzNzBXAEuLmMvxk4kpm/CNxdxhER5wPXAx8C1gB/GhHzImIe8AXgSuB84ONlrCRpQKYNh2w5WmZPKbcELgW+XOpbgWvL9NoyT1l+WUREqW/LzLcy82VgDLio3MYyc39m/gTYVsZKkgakp3MO5S/854BDwC7gO8DrmTlehhwElpbppcD3AMryN4D3ttcn3adbXZI0IPN7GZSZPwUuiIhFwFeAD3YaVn5Gl2Xd6p0CKjvUiIj1wHqAoaEhRkdHO/Z79OjRrssGbVC9bVg5Pv0gYGhB72N70c/n2uTXFZrdX5N7g2b3d7L21lM4TMjM1yNiFFgNLIqI+WXvYBnwShl2EDgXOBgR84GzgMNt9Qnt9+lWn/z4m4HNAMPDwzkyMtKxz9HRUbotG7RB9XbTxh09jduwcpy79hzTP4spHbhhpG/ravLrCs3ur8m9QbP7O1l76+VqpfeVPQYiYgHwUeBF4HHgY2XYOuDhMr29zFOWfzUzs9SvL1cznQesAJ4EngJWlKufTqV10np7P56cJGlmevkTcQmwtVxV9AvAQ5n5SER8G9gWEXcAzwL3lfH3AV+KiDFaewzXA2Tm3oh4CPg2MA7cUg5XERGfAh4D5gFbMnNv356hJOmYTRsOmfk88Csd6vtpXWk0uf6PwHVd1nUncGeH+k5gZw/9SpLmgJ+QliRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUqV/H4XVtJZv3MGGleM9f1pZkgbFPQdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUmXacIiIcyPi8Yh4MSL2RsSnS/3siNgVEfvKz8WlHhFxT0SMRcTzEXFh27rWlfH7ImJdW31VROwp97knIuJ4PFlJUm962XMYBzZk5geB1cAtEXE+sBHYnZkrgN1lHuBKYEW5rQfuhVaYALcDHwEuAm6fCJQyZn3b/dbM/qlJkmZq2nDIzFcz85tl+k3gRWApsBbYWoZtBa4t02uBB7LlG8CiiFgCXAHsyszDmXkE2AWsKcvOzMyvZ2YCD7StS5I0ANH6fdzj4IjlwNeADwPfzcxFbcuOZObiiHgE2JSZf1fqu4FbgRHg9My8o9T/APgxMFrGf7TUfxW4NTOv6fD462ntYTA0NLRq27ZtHfs8evQoCxcu7Pl5zZU933+DoQXw2o8H3Ul3/e5v5dKz+raupr6uE5rcX5N7g2b3dyL1dskllzyTmcO9jJ3f60ojYiHwN8BvZ+Y/THFaoNOCnEG9LmZuBjYDDA8P58jISMcGRkdH6bZskG7auIMNK8e5a0/Pm33O9bu/AzeM9G1dTX1dJzS5vyb3Bs3u72TtraffAhFxCq1g+IvM/NtSfi0ilmTmq+XQ0KFSPwic23b3ZcArpT4yqT5a6ss6jJdmZfnGHXP6eBtWjnPTxh0c2HT1nD6udDz0crVSAPcBL2bmf25btB2YuOJoHfBwW/3GctXSauCNzHwVeAy4PCIWlxPRlwOPlWVvRsTq8lg3tq1LkjQAvew5XAz8BrAnIp4rtX8PbAIeioibge8C15VlO4GrgDHgR8AnADLzcER8FniqjPtMZh4u058E7gcWAI+WmyRpQKYNh3JiudsJhss6jE/gli7r2gJs6VB/mtZJbklSA/gJaUlSxXCQJFUMB0lSxXCQJFUMB0lSpbkf1dUJoZ8fRJv4kJmk4889B0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSZdpwiIgtEXEoIl5oq50dEbsiYl/5ubjUIyLuiYixiHg+Ii5su8+6Mn5fRKxrq6+KiD3lPvdERPT7SUqSjk0vew73A2sm1TYCuzNzBbC7zANcCawot/XAvdAKE+B24CPARcDtE4FSxqxvu9/kx5IkzbFpwyEzvwYcnlReC2wt01uBa9vqD2TLN4BFEbEEuALYlZmHM/MIsAtYU5admZlfz8wEHmhblyRpQGZ6zmEoM18FKD/fX+pLge+1jTtYalPVD3aoS5IGaH6f19fpfEHOoN555RHraR2CYmhoiNHR0Y7jjh492nXZIG1YOc7QgtbPpmpyf03uDd7ur4n/9pr6npjQ5P5O1t5mGg6vRcSSzHy1HBo6VOoHgXPbxi0DXin1kUn10VJf1mF8R5m5GdgMMDw8nCMjIx3HjY6O0m3ZIN20cQcbVo5z155+Z3L/NLm/JvcGb/d34IaRQbdSaep7YkKT+ztZe5vpYaXtwMQVR+uAh9vqN5arllYDb5TDTo8Bl0fE4nIi+nLgsbLszYhYXa5SurFtXZKkAZn2z7CI+Ctaf/WfExEHaV11tAl4KCJuBr4LXFeG7wSuAsaAHwGfAMjMwxHxWeCpMu4zmTlxkvuTtK6IWgA8Wm6SpAGaNhwy8+NdFl3WYWwCt3RZzxZgS4f608CHp+tDkjR3/IS0JKliOEiSKoaDJKnS3OsCpXep5Rt3DORxD2y6eiCPqxOTew6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpMpJ+d1Kg/ruG0l6t3DPQZJUOSn3HKQT0VR7xBtWjnPTcdxj9hthTzzuOUiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKn7OQdKszfZbB2b6OQw/X3H8uOcgSaoYDpKkiuEgSap4zkHSu9ZcfMNyt/MhJ/r5DvccJEkVw0GSVGnMYaWIWAP8CTAP+GJmbhpwS5LU1aD+07C5OpzViD2HiJgHfAG4Ejgf+HhEnD/YriTp5NWIcAAuAsYyc39m/gTYBqwdcE+SdNJqSjgsBb7XNn+w1CRJAxCZOegeiIjrgCsy8zfL/G8AF2Xmv500bj2wvsz+M+ClLqs8B/jBcWp3tprcGzS7vyb3Bs3ur8m9QbP7O5F6+yeZ+b5eBjblhPRB4Ny2+WXAK5MHZeZmYPN0K4uIpzNzuH/t9U+Te4Nm99fk3qDZ/TW5N2h2fydrb005rPQUsCIizouIU4Hrge0D7kmSTlqN2HPIzPGI+BTwGK1LWbdk5t4BtyVJJ61GhANAZu4EdvZpddMeehqgJvcGze6vyb1Bs/trcm/Q7P5Oyt4acUJaktQsTTnnIElqkBMqHCJiTUS8FBFjEbFxjh/7QETsiYjnIuLpUjs7InZFxL7yc3GpR0TcU/p8PiIubFvPujJ+X0Ssm2EvWyLiUES80FbrWy8Rsao817Fy3+hDf38YEd8v2++5iLiqbdlt5bFeiogr2uodX+9yYcMTpe8Hy0UOvfZ2bkQ8HhEvRsTeiPh0U7bfFL01ZdudHhFPRsS3Sn9/NNU6I+K0Mj9Wli+fad+z6O3+iHi5bdtdUOqDeF/Mi4hnI+KRRmy3zDwhbrROZH8H+ABwKvAt4Pw5fPwDwDmTan8MbCzTG4HPlemrgEeBAFYDT5T62cD+8nNxmV48g15+DbgQeOF49AI8CfzLcp9HgSv70N8fAv+uw9jzy2t5GnBeeY3nTfV6Aw8B15fpPwM+eQy9LQEuLNPvAf6+9DDw7TdFb03ZdgEsLNOnAE+UbdJxncBvAX9Wpq8HHpxp37Po7X7gYx3GD+J98bvAXwKPTPVazNV2O5H2HJr4FRxrga1leitwbVv9gWz5BrAoIpYAVwC7MvNwZh4BdgFrjvVBM/NrwOHj0UtZdmZmfj1b/yIfaFvXbPrrZi2wLTPfysyXgTFar3XH17v8tXYp8OUOz7WX3l7NzG+W6TeBF2l9Wn/g22+K3rqZ622XmXm0zJ5SbjnFOtu36ZeBy0oPx9T3LHvrZk7fFxGxDLga+GKZn+q1mJPtdiKFw6C/giOB/xkRz0Trk9wAQ5n5KrTe2MD7S71br8fzOfSrl6Vl+nj0+KmyC78lymGbGfT3XuD1zByfbX9ld/1XaP2V2ajtN6k3aMi2K4dGngMO0frF+Z0p1vnzPsryN0oPx+X9Mbm3zJzYdneWbXd3RJw2ubcee5jt6/p54PeAn5X5qV6LOdluJ1I4dDq+N5eXYl2cmRfS+mbZWyLi16YY263XQTyHY+3lePV4L/BPgQuAV4G7BtlfRCwE/gb47cz8h6mGznV/HXprzLbLzJ9m5gW0vuXgIuCDU6xzTvub3FtEfBi4DfjnwL+gdajo1rnuLSKuAQ5l5jPt5SnWNye9nUjh0NNXcBwvmflK+XkI+AqtN8ZrZXeT8vPQNL0ez+fQr14Olum+9piZr5U378+A/0pr+82kvx/QOgQwf1K9ZxFxCq1fvn+RmX9byo3Yfp16a9K2m5CZrwOjtI7Xd1vnz/soy8+idbjxuL4/2npbUw7VZWa+Bfw3Zr7tZvO6Xgz8ekQcoHXI51JaexKD3W7TnZR4t9xofaBvP60TMRMnXT40R499BvCetun/TetcwX/inScx/7hMX807T3Y9mW+f7HqZ1omuxWX67Bn2tJx3nvDtWy+0vu5kNW+feLuqD/0taZv+HVrHTgE+xDtPsu2ndYKt6+sN/DXvPJH3W8fQV9A6Xvz5SfWBb78pemvKtnsfsKhMLwD+F3BNt3UCt/DOE6sPzbTvWfS2pG3bfh7YNOD3xQhvn5Ae6HY77r845/JG6wqDv6d1nPP35/BxP1A2+LeAvROPTes44G5gX/k58Y8oaP3nRt8B9gDDbev6N7ROJI0Bn5hhP39F6/DC/6P1V8PN/ewFGAZeKPf5L5QPU86yvy+Vx3+e1vdqtf/C+/3yWC/RdgVIt9e7vB5Plr7/GjjtGHr7V7R2uZ8Hniu3q5qw/aborSnb7peAZ0sfLwD/cap1AqeX+bGy/AMz7XsWvX21bLsXgP/O21c0zfn7oqxjhLfDYaDbzU9IS5IqJ9I5B0lSnxgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTK/wciBVVS/D/g4wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data[data.monthly_salary<40000].monthly_salary.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12500.0"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.monthly_salary.median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13755.868728773368"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.monthly_salary.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Role"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_sub_stats(data, prefix):\n",
    "    \n",
    "    features = [feature for feature in data.columns if feature.startswith(prefix)]\n",
    "    salary_mean=[]\n",
    "    salary_median=[]\n",
    "    count=[]\n",
    "    for feature in features:\n",
    "        salary_mean.append(data[data[feature]==1].monthly_salary.mean())\n",
    "        salary_median.append(data[data[feature]==1].monthly_salary.median())\n",
    "        count.append(data[data[feature]==1].shape[0])\n",
    "    sub_data=pd.DataFrame()\n",
    "    sub_data[prefix]=features\n",
    "    sub_data['salary_mean']=salary_mean\n",
    "    sub_data['salary_median']=salary_median\n",
    "    sub_data['head_count']=count\n",
    "    sub_data['percentage']=count/np.sum(count)\n",
    "    sub_data=sub_data.sort_values(by='salary_mean', ascending=False)\n",
    "\n",
    "    return sub_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>career</th>\n",
       "      <th>salary_mean</th>\n",
       "      <th>salary_median</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>career_architect</td>\n",
       "      <td>21490.567677</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>6233</td>\n",
       "      <td>0.038575</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>career_algorithm</td>\n",
       "      <td>20325.387597</td>\n",
       "      <td>18000.0</td>\n",
       "      <td>15781</td>\n",
       "      <td>0.097667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>career_software_engineer</td>\n",
       "      <td>13755.868729</td>\n",
       "      <td>12500.0</td>\n",
       "      <td>139566</td>\n",
       "      <td>0.863758</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     career   salary_mean  salary_median  head_count  \\\n",
       "1          career_architect  21490.567677        20000.0        6233   \n",
       "0          career_algorithm  20325.387597        18000.0       15781   \n",
       "2  career_software_engineer  13755.868729        12500.0      139566   \n",
       "\n",
       "   percentage  \n",
       "1    0.038575  \n",
       "0    0.097667  \n",
       "2    0.863758  "
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_career=get_sub_stats(data,'career')\n",
    "data_career"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "有表可知，架构师的工资平均为2.1万，算法工程师为2万，普通程序员为1万4。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "F_onewayResult(statistic=9378.448428344665, pvalue=0.0)"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "career_architect_salary=data[data.career_architect==1].monthly_salary\n",
    "career_algorithm_salary=data[data.career_algorithm==1].monthly_salary\n",
    "career_software_engineer_salary=data[data.career_software_engineer==1].monthly_salary\n",
    "stats.f_oneway(career_algorithm_salary, career_software_engineer_salary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "F_onewayResult(statistic=49.63271312626023, pvalue=1.908616793279803e-12)"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.f_oneway(career_architect_salary, career_algorithm_salary)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "统计表面，架构师，算法工程师，普通程序员之间的平均工资差异是显著的。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Where are the Developers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>city_beijing</td>\n",
       "      <td>17453.108346</td>\n",
       "      <td>13823</td>\n",
       "      <td>0.099043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>city_shanghai</td>\n",
       "      <td>16626.365819</td>\n",
       "      <td>25888</td>\n",
       "      <td>0.185489</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>city_shenzhen</td>\n",
       "      <td>15631.343242</td>\n",
       "      <td>20757</td>\n",
       "      <td>0.148725</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>city_hangzhou</td>\n",
       "      <td>14976.923667</td>\n",
       "      <td>8690</td>\n",
       "      <td>0.062264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>city_guangzhou</td>\n",
       "      <td>13031.734825</td>\n",
       "      <td>17068</td>\n",
       "      <td>0.122293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>city_nanjing</td>\n",
       "      <td>12874.243172</td>\n",
       "      <td>7091</td>\n",
       "      <td>0.050808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>city_chengdu</td>\n",
       "      <td>11895.973250</td>\n",
       "      <td>7028</td>\n",
       "      <td>0.050356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>city_dongguan</td>\n",
       "      <td>11352.660335</td>\n",
       "      <td>1632</td>\n",
       "      <td>0.011693</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>city_xian</td>\n",
       "      <td>11075.391338</td>\n",
       "      <td>3833</td>\n",
       "      <td>0.027464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>city_wuhan</td>\n",
       "      <td>11074.196462</td>\n",
       "      <td>8027</td>\n",
       "      <td>0.057514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>city_tianjin</td>\n",
       "      <td>10987.852368</td>\n",
       "      <td>1147</td>\n",
       "      <td>0.008218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>city_changsha</td>\n",
       "      <td>10847.045816</td>\n",
       "      <td>3354</td>\n",
       "      <td>0.024032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>city_ningbo</td>\n",
       "      <td>10809.416386</td>\n",
       "      <td>1485</td>\n",
       "      <td>0.010640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>city_fuzhou</td>\n",
       "      <td>10062.398082</td>\n",
       "      <td>2085</td>\n",
       "      <td>0.014939</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>city_dalian</td>\n",
       "      <td>10030.425753</td>\n",
       "      <td>2889</td>\n",
       "      <td>0.020700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>city_chongqing</td>\n",
       "      <td>10016.517510</td>\n",
       "      <td>2570</td>\n",
       "      <td>0.018414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>city_qingdao</td>\n",
       "      <td>9548.856799</td>\n",
       "      <td>1385</td>\n",
       "      <td>0.009924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>city_jinan</td>\n",
       "      <td>9375.090151</td>\n",
       "      <td>1479</td>\n",
       "      <td>0.010597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>city_hefei</td>\n",
       "      <td>9251.720395</td>\n",
       "      <td>3071</td>\n",
       "      <td>0.022004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>city_changchun</td>\n",
       "      <td>8579.913138</td>\n",
       "      <td>614</td>\n",
       "      <td>0.004399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>city_kuming</td>\n",
       "      <td>8457.888229</td>\n",
       "      <td>1011</td>\n",
       "      <td>0.007244</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>city_zhengzhou</td>\n",
       "      <td>8348.115379</td>\n",
       "      <td>2317</td>\n",
       "      <td>0.016601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>city_shenyang</td>\n",
       "      <td>7850.849403</td>\n",
       "      <td>1452</td>\n",
       "      <td>0.010404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>city_harbin</td>\n",
       "      <td>6402.624521</td>\n",
       "      <td>870</td>\n",
       "      <td>0.006234</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              city        salary  head_count  percentage\n",
       "0     city_beijing  17453.108346       13823    0.099043\n",
       "17   city_shanghai  16626.365819       25888    0.185489\n",
       "19   city_shenzhen  15631.343242       20757    0.148725\n",
       "9    city_hangzhou  14976.923667        8690    0.062264\n",
       "8   city_guangzhou  13031.734825       17068    0.122293\n",
       "14    city_nanjing  12874.243172        7091    0.050808\n",
       "3     city_chengdu  11895.973250        7028    0.050356\n",
       "6    city_dongguan  11352.660335        1632    0.011693\n",
       "22       city_xian  11075.391338        3833    0.027464\n",
       "21      city_wuhan  11074.196462        8027    0.057514\n",
       "20    city_tianjin  10987.852368        1147    0.008218\n",
       "2    city_changsha  10847.045816        3354    0.024032\n",
       "15     city_ningbo  10809.416386        1485    0.010640\n",
       "7      city_fuzhou  10062.398082        2085    0.014939\n",
       "5      city_dalian  10030.425753        2889    0.020700\n",
       "4   city_chongqing  10016.517510        2570    0.018414\n",
       "16    city_qingdao   9548.856799        1385    0.009924\n",
       "12      city_jinan   9375.090151        1479    0.010597\n",
       "11      city_hefei   9251.720395        3071    0.022004\n",
       "1   city_changchun   8579.913138         614    0.004399\n",
       "13     city_kuming   8457.888229        1011    0.007244\n",
       "23  city_zhengzhou   8348.115379        2317    0.016601\n",
       "18   city_shenyang   7850.849403        1452    0.010404\n",
       "10     city_harbin   6402.624521         870    0.006234"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_city=get_sub_stats(data,'city')\n",
    "data_city"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "程序员收入最高的城市分别为：北京，上海，深圳，杭州，广州，南京，成都，东莞等。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>city_shanghai</td>\n",
       "      <td>16626.365819</td>\n",
       "      <td>25888</td>\n",
       "      <td>0.185489</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>city_shenzhen</td>\n",
       "      <td>15631.343242</td>\n",
       "      <td>20757</td>\n",
       "      <td>0.148725</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>city_guangzhou</td>\n",
       "      <td>13031.734825</td>\n",
       "      <td>17068</td>\n",
       "      <td>0.122293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>city_beijing</td>\n",
       "      <td>17453.108346</td>\n",
       "      <td>13823</td>\n",
       "      <td>0.099043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>city_hangzhou</td>\n",
       "      <td>14976.923667</td>\n",
       "      <td>8690</td>\n",
       "      <td>0.062264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>city_wuhan</td>\n",
       "      <td>11074.196462</td>\n",
       "      <td>8027</td>\n",
       "      <td>0.057514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>city_nanjing</td>\n",
       "      <td>12874.243172</td>\n",
       "      <td>7091</td>\n",
       "      <td>0.050808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>city_chengdu</td>\n",
       "      <td>11895.973250</td>\n",
       "      <td>7028</td>\n",
       "      <td>0.050356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>city_xian</td>\n",
       "      <td>11075.391338</td>\n",
       "      <td>3833</td>\n",
       "      <td>0.027464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>city_changsha</td>\n",
       "      <td>10847.045816</td>\n",
       "      <td>3354</td>\n",
       "      <td>0.024032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>city_hefei</td>\n",
       "      <td>9251.720395</td>\n",
       "      <td>3071</td>\n",
       "      <td>0.022004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>city_dalian</td>\n",
       "      <td>10030.425753</td>\n",
       "      <td>2889</td>\n",
       "      <td>0.020700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>city_chongqing</td>\n",
       "      <td>10016.517510</td>\n",
       "      <td>2570</td>\n",
       "      <td>0.018414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>city_zhengzhou</td>\n",
       "      <td>8348.115379</td>\n",
       "      <td>2317</td>\n",
       "      <td>0.016601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>city_fuzhou</td>\n",
       "      <td>10062.398082</td>\n",
       "      <td>2085</td>\n",
       "      <td>0.014939</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>city_dongguan</td>\n",
       "      <td>11352.660335</td>\n",
       "      <td>1632</td>\n",
       "      <td>0.011693</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>city_ningbo</td>\n",
       "      <td>10809.416386</td>\n",
       "      <td>1485</td>\n",
       "      <td>0.010640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>city_jinan</td>\n",
       "      <td>9375.090151</td>\n",
       "      <td>1479</td>\n",
       "      <td>0.010597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>city_shenyang</td>\n",
       "      <td>7850.849403</td>\n",
       "      <td>1452</td>\n",
       "      <td>0.010404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>city_qingdao</td>\n",
       "      <td>9548.856799</td>\n",
       "      <td>1385</td>\n",
       "      <td>0.009924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>city_tianjin</td>\n",
       "      <td>10987.852368</td>\n",
       "      <td>1147</td>\n",
       "      <td>0.008218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>city_kuming</td>\n",
       "      <td>8457.888229</td>\n",
       "      <td>1011</td>\n",
       "      <td>0.007244</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>city_harbin</td>\n",
       "      <td>6402.624521</td>\n",
       "      <td>870</td>\n",
       "      <td>0.006234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>city_changchun</td>\n",
       "      <td>8579.913138</td>\n",
       "      <td>614</td>\n",
       "      <td>0.004399</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              city        salary  head_count  percentage\n",
       "17   city_shanghai  16626.365819       25888    0.185489\n",
       "19   city_shenzhen  15631.343242       20757    0.148725\n",
       "8   city_guangzhou  13031.734825       17068    0.122293\n",
       "0     city_beijing  17453.108346       13823    0.099043\n",
       "9    city_hangzhou  14976.923667        8690    0.062264\n",
       "21      city_wuhan  11074.196462        8027    0.057514\n",
       "14    city_nanjing  12874.243172        7091    0.050808\n",
       "3     city_chengdu  11895.973250        7028    0.050356\n",
       "22       city_xian  11075.391338        3833    0.027464\n",
       "2    city_changsha  10847.045816        3354    0.024032\n",
       "11      city_hefei   9251.720395        3071    0.022004\n",
       "5      city_dalian  10030.425753        2889    0.020700\n",
       "4   city_chongqing  10016.517510        2570    0.018414\n",
       "23  city_zhengzhou   8348.115379        2317    0.016601\n",
       "7      city_fuzhou  10062.398082        2085    0.014939\n",
       "6    city_dongguan  11352.660335        1632    0.011693\n",
       "15     city_ningbo  10809.416386        1485    0.010640\n",
       "12      city_jinan   9375.090151        1479    0.010597\n",
       "18   city_shenyang   7850.849403        1452    0.010404\n",
       "16    city_qingdao   9548.856799        1385    0.009924\n",
       "20    city_tianjin  10987.852368        1147    0.008218\n",
       "13     city_kuming   8457.888229        1011    0.007244\n",
       "10     city_harbin   6402.624521         870    0.006234\n",
       "1   city_changchun   8579.913138         614    0.004399"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_city.sort_values(by='head_count', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "四个一线城市的程序员就业机会占24座城市的55.56%\n"
     ]
    }
   ],
   "source": [
    "first_tier_percentage=data_city[data_city.city.isin(['city_beijing','city_shanghai','city_guangzhou','city_shenzhen'])].percentage.sum()\n",
    "print('四个一线城市的程序员就业机会占{0}座城市的{1:.2%}'.format(data_city.shape[0],first_tier_percentage))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "由此可见，程序员向一线城市集中的趋势十分明显。\n",
    "\n",
    "按照程序员容量，重新划分一二三线城市：\n",
    "\n",
    "一线：北京，上海，广州，深圳\n",
    "\n",
    "二线：杭州，武汉，南京，成都\n",
    "\n",
    "三线：西安，长沙，合肥，大连，重庆，郑州，福州，东莞，宁波，济南，沈阳，青岛，天津，昆明。\n",
    "\n",
    "四线：哈尔滨，长春等\n",
    "\n",
    "五线：等等等"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Databases Popularity\n",
    "### Database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>db</th>\n",
       "      <th>salary</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>db_Netezza</td>\n",
       "      <td>27300.000000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>db_Ingres</td>\n",
       "      <td>21500.000000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>db_Neo4j</td>\n",
       "      <td>20033.088235</td>\n",
       "      <td>102</td>\n",
       "      <td>0.000923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>db_DynamoDB</td>\n",
       "      <td>19197.916667</td>\n",
       "      <td>24</td>\n",
       "      <td>0.000217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>db_CouchBase</td>\n",
       "      <td>18709.459459</td>\n",
       "      <td>37</td>\n",
       "      <td>0.000335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>db_Hbase</td>\n",
       "      <td>18531.896909</td>\n",
       "      <td>3214</td>\n",
       "      <td>0.029089</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>db_Elasticsearch</td>\n",
       "      <td>18008.434764</td>\n",
       "      <td>1561</td>\n",
       "      <td>0.014128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>db_CouchDB</td>\n",
       "      <td>17904.761905</td>\n",
       "      <td>21</td>\n",
       "      <td>0.000190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>db_Solr</td>\n",
       "      <td>17568.148926</td>\n",
       "      <td>714</td>\n",
       "      <td>0.006462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>db_Splunk</td>\n",
       "      <td>17482.142857</td>\n",
       "      <td>28</td>\n",
       "      <td>0.000253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>db_MongoDB</td>\n",
       "      <td>16451.658177</td>\n",
       "      <td>6282</td>\n",
       "      <td>0.056856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>db_SAP_HANA</td>\n",
       "      <td>16116.666667</td>\n",
       "      <td>30</td>\n",
       "      <td>0.000272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>db_Redis</td>\n",
       "      <td>16073.004824</td>\n",
       "      <td>14581</td>\n",
       "      <td>0.131967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>db_Memcached</td>\n",
       "      <td>15968.959878</td>\n",
       "      <td>2293</td>\n",
       "      <td>0.020753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>db_Teradata</td>\n",
       "      <td>15723.744292</td>\n",
       "      <td>146</td>\n",
       "      <td>0.001321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>db_Riak</td>\n",
       "      <td>15625.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>0.000036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>db_MariaDB</td>\n",
       "      <td>15254.545455</td>\n",
       "      <td>110</td>\n",
       "      <td>0.000996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>db_Firebase</td>\n",
       "      <td>15166.666667</td>\n",
       "      <td>6</td>\n",
       "      <td>0.000054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>db_PostgreSQL</td>\n",
       "      <td>14929.289676</td>\n",
       "      <td>1863</td>\n",
       "      <td>0.016861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>db_MySQL</td>\n",
       "      <td>13777.641738</td>\n",
       "      <td>36264</td>\n",
       "      <td>0.328211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>db_SQLite</td>\n",
       "      <td>12880.956532</td>\n",
       "      <td>1434</td>\n",
       "      <td>0.012979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>db_Oracle</td>\n",
       "      <td>12653.985456</td>\n",
       "      <td>28236</td>\n",
       "      <td>0.255553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>db_DB2</td>\n",
       "      <td>12639.861919</td>\n",
       "      <td>2752</td>\n",
       "      <td>0.024907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>db_Sybase</td>\n",
       "      <td>12061.340206</td>\n",
       "      <td>582</td>\n",
       "      <td>0.005267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>db_SQL_Server</td>\n",
       "      <td>11694.509873</td>\n",
       "      <td>9977</td>\n",
       "      <td>0.090298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>db_dBase</td>\n",
       "      <td>11583.333333</td>\n",
       "      <td>6</td>\n",
       "      <td>0.000054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>db_Firebird</td>\n",
       "      <td>11000.000000</td>\n",
       "      <td>7</td>\n",
       "      <td>0.000063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>db_Informix</td>\n",
       "      <td>10912.378641</td>\n",
       "      <td>206</td>\n",
       "      <td>0.001864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>db_Apache_Hive</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>db_FileMaker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  db        salary  head_count  percentage\n",
       "17        db_Netezza  27300.000000           5    0.000045\n",
       "11         db_Ingres  21500.000000           5    0.000045\n",
       "16          db_Neo4j  20033.088235         102    0.000923\n",
       "4        db_DynamoDB  19197.916667          24    0.000217\n",
       "1       db_CouchBase  18709.459459          37    0.000335\n",
       "9           db_Hbase  18531.896909        3214    0.029089\n",
       "5   db_Elasticsearch  18008.434764        1561    0.014128\n",
       "2         db_CouchDB  17904.761905          21    0.000190\n",
       "25           db_Solr  17568.148926         714    0.006462\n",
       "26         db_Splunk  17482.142857          28    0.000253\n",
       "14        db_MongoDB  16451.658177        6282    0.056856\n",
       "22       db_SAP_HANA  16116.666667          30    0.000272\n",
       "20          db_Redis  16073.004824       14581    0.131967\n",
       "13      db_Memcached  15968.959878        2293    0.020753\n",
       "28       db_Teradata  15723.744292         146    0.001321\n",
       "21           db_Riak  15625.000000           4    0.000036\n",
       "12        db_MariaDB  15254.545455         110    0.000996\n",
       "7        db_Firebase  15166.666667           6    0.000054\n",
       "19     db_PostgreSQL  14929.289676        1863    0.016861\n",
       "15          db_MySQL  13777.641738       36264    0.328211\n",
       "24         db_SQLite  12880.956532        1434    0.012979\n",
       "18         db_Oracle  12653.985456       28236    0.255553\n",
       "3             db_DB2  12639.861919        2752    0.024907\n",
       "27         db_Sybase  12061.340206         582    0.005267\n",
       "23     db_SQL_Server  11694.509873        9977    0.090298\n",
       "29          db_dBase  11583.333333           6    0.000054\n",
       "8        db_Firebird  11000.000000           7    0.000063\n",
       "10       db_Informix  10912.378641         206    0.001864\n",
       "0     db_Apache_Hive           NaN           0    0.000000\n",
       "6       db_FileMaker           NaN           0    0.000000"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_db=get_sub_stats(data,'db')\n",
    "data_db"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据显示，掌握Netezza，Ingres，Neo4j，DynamoDB，CouchBase等数据库技术，可以带来高收入。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>db</th>\n",
       "      <th>salary</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>db_MySQL</td>\n",
       "      <td>13777.641738</td>\n",
       "      <td>36264</td>\n",
       "      <td>0.328211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>db_Oracle</td>\n",
       "      <td>12653.985456</td>\n",
       "      <td>28236</td>\n",
       "      <td>0.255553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>db_Redis</td>\n",
       "      <td>16073.004824</td>\n",
       "      <td>14581</td>\n",
       "      <td>0.131967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>db_SQL_Server</td>\n",
       "      <td>11694.509873</td>\n",
       "      <td>9977</td>\n",
       "      <td>0.090298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>db_MongoDB</td>\n",
       "      <td>16451.658177</td>\n",
       "      <td>6282</td>\n",
       "      <td>0.056856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>db_Hbase</td>\n",
       "      <td>18531.896909</td>\n",
       "      <td>3214</td>\n",
       "      <td>0.029089</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>db_DB2</td>\n",
       "      <td>12639.861919</td>\n",
       "      <td>2752</td>\n",
       "      <td>0.024907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>db_Memcached</td>\n",
       "      <td>15968.959878</td>\n",
       "      <td>2293</td>\n",
       "      <td>0.020753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>db_PostgreSQL</td>\n",
       "      <td>14929.289676</td>\n",
       "      <td>1863</td>\n",
       "      <td>0.016861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>db_Elasticsearch</td>\n",
       "      <td>18008.434764</td>\n",
       "      <td>1561</td>\n",
       "      <td>0.014128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>db_SQLite</td>\n",
       "      <td>12880.956532</td>\n",
       "      <td>1434</td>\n",
       "      <td>0.012979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>db_Solr</td>\n",
       "      <td>17568.148926</td>\n",
       "      <td>714</td>\n",
       "      <td>0.006462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>db_Sybase</td>\n",
       "      <td>12061.340206</td>\n",
       "      <td>582</td>\n",
       "      <td>0.005267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>db_Informix</td>\n",
       "      <td>10912.378641</td>\n",
       "      <td>206</td>\n",
       "      <td>0.001864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>db_Teradata</td>\n",
       "      <td>15723.744292</td>\n",
       "      <td>146</td>\n",
       "      <td>0.001321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>db_MariaDB</td>\n",
       "      <td>15254.545455</td>\n",
       "      <td>110</td>\n",
       "      <td>0.000996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>db_Neo4j</td>\n",
       "      <td>20033.088235</td>\n",
       "      <td>102</td>\n",
       "      <td>0.000923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>db_CouchBase</td>\n",
       "      <td>18709.459459</td>\n",
       "      <td>37</td>\n",
       "      <td>0.000335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>db_SAP_HANA</td>\n",
       "      <td>16116.666667</td>\n",
       "      <td>30</td>\n",
       "      <td>0.000272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>db_Splunk</td>\n",
       "      <td>17482.142857</td>\n",
       "      <td>28</td>\n",
       "      <td>0.000253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>db_DynamoDB</td>\n",
       "      <td>19197.916667</td>\n",
       "      <td>24</td>\n",
       "      <td>0.000217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>db_CouchDB</td>\n",
       "      <td>17904.761905</td>\n",
       "      <td>21</td>\n",
       "      <td>0.000190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>db_Firebird</td>\n",
       "      <td>11000.000000</td>\n",
       "      <td>7</td>\n",
       "      <td>0.000063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>db_Firebase</td>\n",
       "      <td>15166.666667</td>\n",
       "      <td>6</td>\n",
       "      <td>0.000054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>db_dBase</td>\n",
       "      <td>11583.333333</td>\n",
       "      <td>6</td>\n",
       "      <td>0.000054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>db_Ingres</td>\n",
       "      <td>21500.000000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>db_Netezza</td>\n",
       "      <td>27300.000000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>db_Riak</td>\n",
       "      <td>15625.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>0.000036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>db_Apache_Hive</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>db_FileMaker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  db        salary  head_count  percentage\n",
       "15          db_MySQL  13777.641738       36264    0.328211\n",
       "18         db_Oracle  12653.985456       28236    0.255553\n",
       "20          db_Redis  16073.004824       14581    0.131967\n",
       "23     db_SQL_Server  11694.509873        9977    0.090298\n",
       "14        db_MongoDB  16451.658177        6282    0.056856\n",
       "9           db_Hbase  18531.896909        3214    0.029089\n",
       "3             db_DB2  12639.861919        2752    0.024907\n",
       "13      db_Memcached  15968.959878        2293    0.020753\n",
       "19     db_PostgreSQL  14929.289676        1863    0.016861\n",
       "5   db_Elasticsearch  18008.434764        1561    0.014128\n",
       "24         db_SQLite  12880.956532        1434    0.012979\n",
       "25           db_Solr  17568.148926         714    0.006462\n",
       "27         db_Sybase  12061.340206         582    0.005267\n",
       "10       db_Informix  10912.378641         206    0.001864\n",
       "28       db_Teradata  15723.744292         146    0.001321\n",
       "12        db_MariaDB  15254.545455         110    0.000996\n",
       "16          db_Neo4j  20033.088235         102    0.000923\n",
       "1       db_CouchBase  18709.459459          37    0.000335\n",
       "22       db_SAP_HANA  16116.666667          30    0.000272\n",
       "26         db_Splunk  17482.142857          28    0.000253\n",
       "4        db_DynamoDB  19197.916667          24    0.000217\n",
       "2         db_CouchDB  17904.761905          21    0.000190\n",
       "8        db_Firebird  11000.000000           7    0.000063\n",
       "7        db_Firebase  15166.666667           6    0.000054\n",
       "29          db_dBase  11583.333333           6    0.000054\n",
       "11         db_Ingres  21500.000000           5    0.000045\n",
       "17        db_Netezza  27300.000000           5    0.000045\n",
       "21           db_Riak  15625.000000           4    0.000036\n",
       "0     db_Apache_Hive           NaN           0    0.000000\n",
       "6       db_FileMaker           NaN           0    0.000000"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_db.sort_values(by='head_count', ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "由此可见，MySQL（33%），Oracle（26%），Redis（13%），SQL Server(9%)占据了绝大多数市场份额。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Programming Languages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_pl=get_sub_stats(data,'pl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pl</th>\n",
       "      <th>salary</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>pl_java</td>\n",
       "      <td>14155.706303</td>\n",
       "      <td>53327</td>\n",
       "      <td>0.299106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pl_cpp</td>\n",
       "      <td>15869.083763</td>\n",
       "      <td>29281</td>\n",
       "      <td>0.164234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pl_javascript</td>\n",
       "      <td>12274.682531</td>\n",
       "      <td>27483</td>\n",
       "      <td>0.154149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pl_c_sharp</td>\n",
       "      <td>12008.116585</td>\n",
       "      <td>19954</td>\n",
       "      <td>0.111920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>pl_python</td>\n",
       "      <td>17751.090593</td>\n",
       "      <td>14946</td>\n",
       "      <td>0.083831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>pl_go</td>\n",
       "      <td>17567.808789</td>\n",
       "      <td>12630</td>\n",
       "      <td>0.070840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>pl_php</td>\n",
       "      <td>13163.163089</td>\n",
       "      <td>9457</td>\n",
       "      <td>0.053043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>pl_matlab</td>\n",
       "      <td>17720.568562</td>\n",
       "      <td>2691</td>\n",
       "      <td>0.015094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>pl_lua</td>\n",
       "      <td>16210.063028</td>\n",
       "      <td>2327</td>\n",
       "      <td>0.013052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>pl_swift</td>\n",
       "      <td>14246.094609</td>\n",
       "      <td>1515</td>\n",
       "      <td>0.008497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>pl_perl</td>\n",
       "      <td>17219.907072</td>\n",
       "      <td>1381</td>\n",
       "      <td>0.007746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>pl_typescript</td>\n",
       "      <td>14287.894201</td>\n",
       "      <td>983</td>\n",
       "      <td>0.005514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>pl_ruby</td>\n",
       "      <td>16159.683426</td>\n",
       "      <td>716</td>\n",
       "      <td>0.004016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>pl_delphi</td>\n",
       "      <td>10740.599051</td>\n",
       "      <td>562</td>\n",
       "      <td>0.003152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>pl_kotlin</td>\n",
       "      <td>14866.808252</td>\n",
       "      <td>412</td>\n",
       "      <td>0.002311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>pl_objective_c</td>\n",
       "      <td>13492.978395</td>\n",
       "      <td>216</td>\n",
       "      <td>0.001212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>pl_vba</td>\n",
       "      <td>11461.029412</td>\n",
       "      <td>204</td>\n",
       "      <td>0.001144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>pl_rust</td>\n",
       "      <td>19343.881857</td>\n",
       "      <td>158</td>\n",
       "      <td>0.000886</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>pl_visual_basic</td>\n",
       "      <td>11033.950617</td>\n",
       "      <td>27</td>\n",
       "      <td>0.000151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>pl_haskell</td>\n",
       "      <td>24371.794872</td>\n",
       "      <td>13</td>\n",
       "      <td>0.000073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>pl_julia</td>\n",
       "      <td>14383.333333</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>pl_scrala</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 pl        salary  head_count  percentage\n",
       "5           pl_java  14155.706303       53327    0.299106\n",
       "1            pl_cpp  15869.083763       29281    0.164234\n",
       "6     pl_javascript  12274.682531       27483    0.154149\n",
       "0        pl_c_sharp  12008.116585       19954    0.111920\n",
       "14        pl_python  17751.090593       14946    0.083831\n",
       "3             pl_go  17567.808789       12630    0.070840\n",
       "13           pl_php  13163.163089        9457    0.053043\n",
       "10        pl_matlab  17720.568562        2691    0.015094\n",
       "9            pl_lua  16210.063028        2327    0.013052\n",
       "18         pl_swift  14246.094609        1515    0.008497\n",
       "12          pl_perl  17219.907072        1381    0.007746\n",
       "19    pl_typescript  14287.894201         983    0.005514\n",
       "15          pl_ruby  16159.683426         716    0.004016\n",
       "2         pl_delphi  10740.599051         562    0.003152\n",
       "8         pl_kotlin  14866.808252         412    0.002311\n",
       "11   pl_objective_c  13492.978395         216    0.001212\n",
       "20           pl_vba  11461.029412         204    0.001144\n",
       "16          pl_rust  19343.881857         158    0.000886\n",
       "21  pl_visual_basic  11033.950617          27    0.000151\n",
       "4        pl_haskell  24371.794872          13    0.000073\n",
       "7          pl_julia  14383.333333           5    0.000028\n",
       "17        pl_scrala           NaN           0    0.000000"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pl.sort_values(by='head_count', ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据显示，java的市场份额是最大的，占了30%。其份额约等于第二和第三位，既C++和javascript，的总和。其他主流语言为javascript，c#，python，go和php。而这七大主流语言中，python和go的工资最高，达到了17000。javascript和c#的工资最低，只有12000。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pl</th>\n",
       "      <th>salary</th>\n",
       "      <th>head_count</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>pl_haskell</td>\n",
       "      <td>24371.794872</td>\n",
       "      <td>13</td>\n",
       "      <td>0.000073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>pl_rust</td>\n",
       "      <td>19343.881857</td>\n",
       "      <td>158</td>\n",
       "      <td>0.000886</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>pl_python</td>\n",
       "      <td>17751.090593</td>\n",
       "      <td>14946</td>\n",
       "      <td>0.083831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>pl_matlab</td>\n",
       "      <td>17720.568562</td>\n",
       "      <td>2691</td>\n",
       "      <td>0.015094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>pl_go</td>\n",
       "      <td>17567.808789</td>\n",
       "      <td>12630</td>\n",
       "      <td>0.070840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>pl_perl</td>\n",
       "      <td>17219.907072</td>\n",
       "      <td>1381</td>\n",
       "      <td>0.007746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>pl_lua</td>\n",
       "      <td>16210.063028</td>\n",
       "      <td>2327</td>\n",
       "      <td>0.013052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>pl_ruby</td>\n",
       "      <td>16159.683426</td>\n",
       "      <td>716</td>\n",
       "      <td>0.004016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pl_cpp</td>\n",
       "      <td>15869.083763</td>\n",
       "      <td>29281</td>\n",
       "      <td>0.164234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>pl_kotlin</td>\n",
       "      <td>14866.808252</td>\n",
       "      <td>412</td>\n",
       "      <td>0.002311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>pl_julia</td>\n",
       "      <td>14383.333333</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>pl_typescript</td>\n",
       "      <td>14287.894201</td>\n",
       "      <td>983</td>\n",
       "      <td>0.005514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>pl_swift</td>\n",
       "      <td>14246.094609</td>\n",
       "      <td>1515</td>\n",
       "      <td>0.008497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>pl_java</td>\n",
       "      <td>14155.706303</td>\n",
       "      <td>53327</td>\n",
       "      <td>0.299106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>pl_objective_c</td>\n",
       "      <td>13492.978395</td>\n",
       "      <td>216</td>\n",
       "      <td>0.001212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>pl_php</td>\n",
       "      <td>13163.163089</td>\n",
       "      <td>9457</td>\n",
       "      <td>0.053043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pl_javascript</td>\n",
       "      <td>12274.682531</td>\n",
       "      <td>27483</td>\n",
       "      <td>0.154149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pl_c_sharp</td>\n",
       "      <td>12008.116585</td>\n",
       "      <td>19954</td>\n",
       "      <td>0.111920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>pl_vba</td>\n",
       "      <td>11461.029412</td>\n",
       "      <td>204</td>\n",
       "      <td>0.001144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>pl_visual_basic</td>\n",
       "      <td>11033.950617</td>\n",
       "      <td>27</td>\n",
       "      <td>0.000151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>pl_delphi</td>\n",
       "      <td>10740.599051</td>\n",
       "      <td>562</td>\n",
       "      <td>0.003152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>pl_scrala</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 pl        salary  head_count  percentage\n",
       "4        pl_haskell  24371.794872          13    0.000073\n",
       "16          pl_rust  19343.881857         158    0.000886\n",
       "14        pl_python  17751.090593       14946    0.083831\n",
       "10        pl_matlab  17720.568562        2691    0.015094\n",
       "3             pl_go  17567.808789       12630    0.070840\n",
       "12          pl_perl  17219.907072        1381    0.007746\n",
       "9            pl_lua  16210.063028        2327    0.013052\n",
       "15          pl_ruby  16159.683426         716    0.004016\n",
       "1            pl_cpp  15869.083763       29281    0.164234\n",
       "8         pl_kotlin  14866.808252         412    0.002311\n",
       "7          pl_julia  14383.333333           5    0.000028\n",
       "19    pl_typescript  14287.894201         983    0.005514\n",
       "18         pl_swift  14246.094609        1515    0.008497\n",
       "5           pl_java  14155.706303       53327    0.299106\n",
       "11   pl_objective_c  13492.978395         216    0.001212\n",
       "13           pl_php  13163.163089        9457    0.053043\n",
       "6     pl_javascript  12274.682531       27483    0.154149\n",
       "0        pl_c_sharp  12008.116585       19954    0.111920\n",
       "20           pl_vba  11461.029412         204    0.001144\n",
       "21  pl_visual_basic  11033.950617          27    0.000151\n",
       "2         pl_delphi  10740.599051         562    0.003152\n",
       "17        pl_scrala           NaN           0    0.000000"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pl"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "从工资上面看，haskell的工资遥遥领先，一个月2万3呀！我连忙查了一下，什么是haskell。kaskell是一门函数式functional langugae。我大学里函数式语言考试（F#）还拿了100分呢（一半人都挂了），现在后悔没有搞函数式编程，不知道转行是不是来得及。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "company_size_50_150        40202.0\n",
       "company_size_150_500       30493.0\n",
       "company_size_50            22544.0\n",
       "company_size_1000_5000     16753.0\n",
       "company_size_500_1000      13408.0\n",
       "company_size_10000          5433.0\n",
       "company_size_5000_10000     2601.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_subset(data_sum, 'company_size_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "company_size_10000         19133.420976\n",
       "company_size_5000_10000    15321.567381\n",
       "company_size_1000_5000     15247.573198\n",
       "company_size_500_1000      14714.131745\n",
       "company_size_150_500       13620.251887\n",
       "company_size_50_150        12901.782978\n",
       "company_size_50            12288.195438\n",
       "dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_mean_salary_by('company_size_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "company_type_private               88787.0\n",
       "company_type_listed                11800.0\n",
       "company_type_state                  7301.0\n",
       "company_type_foreign                5378.0\n",
       "company_type_us_eu                  5113.0\n",
       "company_type_startup                3079.0\n",
       "company_type_public_institution      824.0\n",
       "company_type_non_profit              133.0\n",
       "company_type_foreign_rep              98.0\n",
       "company_type_foreign_gov              18.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_subset(data_sum, 'company_type_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "company_type_us_eu                 16553.452133\n",
       "company_type_foreign_rep           16158.179012\n",
       "company_type_listed                14597.330645\n",
       "company_type_startup               14407.678775\n",
       "company_type_foreign               13904.301029\n",
       "company_type_state                 13818.917714\n",
       "company_type_private               13358.761498\n",
       "company_type_foreign_gov           13347.222222\n",
       "company_type_public_institution    13137.405866\n",
       "company_type_non_profit            10167.386091\n",
       "dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_mean_salary_by('company_type_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_industry=get_subset(data_sum, 'industry_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "industry_computer      113090.0\n",
       "industry_trade          10821.0\n",
       "industry_edu             9779.0\n",
       "industry_finance         5296.0\n",
       "industry_medical         4092.0\n",
       "industry_gov             3330.0\n",
       "industry_energy          3016.0\n",
       "industry_realestate      2988.0\n",
       "industry_ads             2876.0\n",
       "industry_logistic        2164.0\n",
       "industry_service         1348.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_industry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "industry_finance       15933.725683\n",
       "industry_edu           15065.823747\n",
       "industry_logistic      14341.192681\n",
       "industry_service       14236.452514\n",
       "industry_computer      13756.265933\n",
       "industry_trade         13681.545418\n",
       "industry_gov           13412.566477\n",
       "industry_medical       12949.943916\n",
       "industry_realestate    12855.498612\n",
       "industry_energy        12729.150934\n",
       "industry_ads           12514.656120\n",
       "dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_mean_salary_by('industry_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "experience_1_3     40876.0\n",
       "experience_3_5     37925.0\n",
       "experience_no      36288.0\n",
       "experience_5_10    16370.0\n",
       "experience_10        459.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_subset(data_sum, 'experience_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "experience_10      29943.149718\n",
       "experience_5_10    20635.954227\n",
       "experience_3_5     15021.980430\n",
       "experience_no      11874.269926\n",
       "experience_1_3     11328.539647\n",
       "dtype: float64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_mean_salary_by('experience_')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 工资分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1eb11272048>"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAE7tJREFUeJzt3XusnHWdx/H3d1tBxMUW0bPdlmzL2rhbJK5wgqAbMxEXChhhE01KyFqUTRPF63bjliVZdr0k4mVVst4aQcGwAiK7EMAlBJlkN9FyEQUq1h5LF46woimg1Xg5+t0/5nfs9Pzm9MxMz5w5Y9+vZHKe5/f8nuf5zm9Oz2eey0wjM5Ekqd0fDLsASdLiYzhIkiqGgySpYjhIkiqGgySpYjhIkiqGgySpYjhIkiqGgySpsnTYBfTrmGOOydWrV/e0zs9+9jOOPPLIwRQ0z0apVrDeQbPewRmlWuHg6r3vvvt+nJkv6KpzZo7k46STTspe3XXXXT2vMyyjVGum9Q6a9Q7OKNWaeXD1Avdml39jPa0kSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaqM7NdnHIzVW24dyn53f/DsoexXknrlkYMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqXYVDRLw7IrZHxEMR8aWIeHZErImIbRGxMyKui4jDSt/Dy/xEWb66bTsXl/YdEXFGW/v60jYREVvm+0lKknozZzhExErgHcB4Zr4EWAJsAC4DPpaZa4GngAvLKhcCT2Xmi4CPlX5ExLqy3vHAeuBTEbEkIpYAnwTOBNYB55W+kqQh6fa00lLgiIhYCjwHeAJ4NXBDWX4VcG6ZPqfMU5afFhFR2q/NzF9m5iPABHByeUxk5q7M/BVwbekrSRqSOcMhM38AfAR4lFYoPAPcBzydmVOl2ySwskyvBB4r606V/s9vb5+xzmztkqQhWTpXh4hYTuud/BrgaeDLtE4BzZTTq8yybLb2TgGVHdqIiE3AJoCxsTGazeaBSq/s3buXZrPJ5hOm5u48AL3UO13rqLDewbLewRmlWmHh6p0zHIDXAI9k5o8AIuJG4BXAsohYWo4OVgGPl/6TwLHAZDkN9TxgT1v7tPZ1ZmvfT2ZuBbYCjI+PZ6PR6KL8fZrNJo1Ggwu23NrTevNl9/mNrvtO1zoqrHewrHdwRqlWWLh6u7nm8ChwSkQ8p1w7OA34DnAX8PrSZyNwU5m+ucxTln8tM7O0byh3M60B1gJ3A/cAa8vdT4fRumh988E/NUlSv+Y8csjMbRFxA/BNYAq4n9a791uBayPi/aXtirLKFcAXI2KC1hHDhrKd7RFxPa1gmQIuyszfAETE24Dbad0JdWVmbp+/pyhJ6lU3p5XIzEuBS2c076J1p9HMvr8A3jDLdj4AfKBD+23Abd3UIkkaPD8hLUmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqdBUOEbEsIm6IiO9GxMMRcWpEHB0Rd0TEzvJzeekbEXF5RExExAMRcWLbdjaW/jsjYmNb+0kR8WBZ5/KIiPl/qpKkbnV75PAJ4L8y88+AlwIPA1uAOzNzLXBnmQc4E1hbHpuATwNExNHApcDLgZOBS6cDpfTZ1Lbe+oN7WpKkgzFnOETEUcCrgCsAMvNXmfk0cA5wVel2FXBumT4HuDpbvgEsi4gVwBnAHZm5JzOfAu4A1pdlR2Xm1zMzgavbtiVJGoJujhyOA34EfD4i7o+Iz0XEkcBYZj4BUH6+sPRfCTzWtv5kaTtQ+2SHdknSkCztss+JwNszc1tEfIJ9p5A66XS9IPtorzccsYnW6SfGxsZoNpsHKKO2d+9ems0mm0+Y6mm9+dJLvdO1jgrrHSzrHZxRqhUWrt5uwmESmMzMbWX+Blrh8MOIWJGZT5RTQ0+29T+2bf1VwOOlvTGjvVnaV3XoX8nMrcBWgPHx8Ww0Gp26zarZbNJoNLhgy609rTdfdp/f6LrvdK2jwnoHy3oHZ5RqhYWrd87TSpn5f8BjEfHi0nQa8B3gZmD6jqONwE1l+mbgjeWupVOAZ8ppp9uB0yNiebkQfTpwe1n204g4pdyl9Ma2bUmShqCbIweAtwPXRMRhwC7gTbSC5fqIuBB4FHhD6XsbcBYwAfy89CUz90TE+4B7Sr/3ZuaeMv0W4AvAEcBXy0OSNCRdhUNmfgsY77DotA59E7holu1cCVzZof1e4CXd1CJJGjw/IS1JqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTK0mEXcChZveXWrvtuPmGKC3roP5fdHzx73rYl6fefRw6SpIrhIEmqdB0OEbEkIu6PiFvK/JqI2BYROyPiuog4rLQfXuYnyvLVbdu4uLTviIgz2trXl7aJiNgyf09PktSPXo4c3gk83DZ/GfCxzFwLPAVcWNovBJ7KzBcBHyv9iIh1wAbgeGA98KkSOEuATwJnAuuA80pfSdKQdBUOEbEKOBv4XJkP4NXADaXLVcC5ZfqcMk9Zflrpfw5wbWb+MjMfASaAk8tjIjN3ZeavgGtLX0nSkHR75PBx4D3Ab8v884GnM3OqzE8CK8v0SuAxgLL8mdL/d+0z1pmtXZI0JHPeyhoRrwWezMz7IqIx3dyha86xbLb2TgGVHdqIiE3AJoCxsTGazebshXewd+9ems0mm0+YmrvzkI0dwbzW2etY9Wp6bEeF9Q7WKNU7SrXCwtXbzeccXgm8LiLOAp4NHEXrSGJZRCwtRwergMdL/0ngWGAyIpYCzwP2tLVPa19ntvb9ZOZWYCvA+Ph4NhqNLsrfp9ls0mg05vXzA4Oy+YQpPvrg/H0MZff5jXnbVifTYzsqrHewRqneUaoVFq7eOU8rZebFmbkqM1fTuqD8tcw8H7gLeH3pthG4qUzfXOYpy7+WmVnaN5S7mdYAa4G7gXuAteXup8PKPm6el2cnSerLwbw1/Qfg2oh4P3A/cEVpvwL4YkRM0Dpi2ACQmdsj4nrgO8AUcFFm/gYgIt4G3A4sAa7MzO0HUZck6SD1FA6Z2QSaZXoXrTuNZvb5BfCGWdb/APCBDu23Abf1UoskaXD8hLQkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqTJnOETEsRFxV0Q8HBHbI+Kdpf3oiLgjInaWn8tLe0TE5RExEREPRMSJbdvaWPrvjIiNbe0nRcSDZZ3LIyIG8WQlSd3p5shhCticmX8OnAJcFBHrgC3AnZm5FrizzAOcCawtj03Ap6EVJsClwMuBk4FLpwOl9NnUtt76g39qkqR+zRkOmflEZn6zTP8UeBhYCZwDXFW6XQWcW6bPAa7Olm8AyyJiBXAGcEdm7snMp4A7gPVl2VGZ+fXMTODqtm1Jkoagp2sOEbEaeBmwDRjLzCegFSDAC0u3lcBjbatNlrYDtU92aJckDcnSbjtGxHOBrwDvysyfHOCyQKcF2Ud7pxo20Tr9xNjYGM1mc46q97d3716azSabT5jqab1hGDuCea2z17Hq1fTYjgrrHaxRqneUaoWFq7ercIiIZ9EKhmsy88bS/MOIWJGZT5RTQ0+W9kng2LbVVwGPl/bGjPZmaV/VoX8lM7cCWwHGx8ez0Wh06jarZrNJo9Hggi239rTeMGw+YYqPPth1ds9p9/mNedtWJ9NjOyqsd7BGqd5RqhUWrt5u7lYK4Arg4cz817ZFNwPTdxxtBG5qa39juWvpFOCZctrpduD0iFheLkSfDtxelv00Ik4p+3pj27YkSUPQzVvTVwJ/AzwYEd8qbf8IfBC4PiIuBB4F3lCW3QacBUwAPwfeBJCZeyLifcA9pd97M3NPmX4L8AXgCOCr5SFJGpI5wyEz/4fO1wUATuvQP4GLZtnWlcCVHdrvBV4yVy2SpIXhJ6QlSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUWTrsArQwVm+5daDb33zCFBd02MfuD5490P1KGgyPHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklRZNOEQEesjYkdETETElmHXI0mHskURDhGxBPgkcCawDjgvItYNtypJOnQtlq/POBmYyMxdABFxLXAO8J2hVqWDNuiv7TgQv7pD6t9iCYeVwGNt85PAy4dUi35PHCiYZvsuqPlgKOn3wWIJh+jQllWniE3ApjK7NyJ29LifY4Af97jOULxjhGoF620Xlw1iq6M1voxWvaNUKxxcvX/SbcfFEg6TwLFt86uAx2d2ysytwNZ+dxIR92bmeL/rL6RRqhWsd9Csd3BGqVZYuHoXxQVp4B5gbUSsiYjDgA3AzUOuSZIOWYviyCEzpyLibcDtwBLgyszcPuSyJOmQtSjCASAzbwNuG/Bu+j4lNQSjVCtY76BZ7+CMUq2wQPVGZnXdV5J0iFss1xwkSYvIIREOw/pqjog4NiLuioiHI2J7RLyztB8dEXdExM7yc3lpj4i4vNT5QESc2LatjaX/zojY2NZ+UkQ8WNa5PCI63Rbca91LIuL+iLilzK+JiG1l39eVmwaIiMPL/ERZvrptGxeX9h0RcUZb+7y+FhGxLCJuiIjvlnE+dTGPb0S8u/wuPBQRX4qIZy+m8Y2IKyPiyYh4qK1t4OM52z76rPfD5ffhgYj4j4hY1u+49fPa9FJr27K/j4iMiGMWy9iSmb/XD1oXuL8PHAccBnwbWLdA+14BnFim/xD4Hq2vB/kQsKW0bwEuK9NnAV+l9bmPU4Btpf1oYFf5ubxMLy/L7gZOLet8FThzHur+O+DfgVvK/PXAhjL9GeAtZfqtwGfK9AbgujK9rozz4cCaMv5LBvFaAFcBf1umDwOWLdbxpfVhz0eAI9rG9YLFNL7Aq4ATgYfa2gY+nrPto896TweWlunL2urtedx6fW16rbW0H0vrZpz/BY5ZNGN7sH9IFvujDNbtbfMXAxcPqZabgL8CdgArStsKYEeZ/ixwXlv/HWX5ecBn29o/W9pWAN9ta9+vX581rgLuBF4N3FJ+0X7c9o/td+NZfqFPLdNLS7+YOcbT/eb7tQCOovXHNma0L8rxZd83ARxdxusW4IzFNr7Aavb/Yzvw8ZxtH/3UO2PZXwPXdBqPucatn9/9fmoFbgBeCuxmXzgMfWwPhdNKnb6aY+VCF1EOO18GbAPGMvMJgPLzhaXbbLUeqH2yQ/vB+DjwHuC3Zf75wNOZOdVhH7+rqyx/pvTv9Xn06zjgR8Dno3Ua7HMRcSSLdHwz8wfAR4BHgSdojdd9LN7xnbYQ4znbPg7Wm2m9i+6n3n5+93sSEa8DfpCZ356xaOhjeyiEQ1dfzTHQAiKeC3wFeFdm/uRAXTu0ZR/tfYmI1wJPZuZ9XdR0oGULUi+td2wnAp/OzJcBP6N12DybYY/vclpfKLkG+GPgSFrfRDzbPoY9vnNZ1PVFxCXAFHDNdFOPdfXzu99Lfc8BLgH+qdPiHmua97E9FMKhq6/mGJSIeBatYLgmM28szT+MiBVl+QrgyTlqPVD7qg7t/Xol8LqI2A1cS+vU0seBZREx/ZmY9n38rq6y/HnAnj6eR78mgcnM3Fbmb6AVFot1fF8DPJKZP8rMXwM3Aq9g8Y7vtIUYz9n20Zdyofa1wPlZzqf0Ue+P6f216cWf0nqj8O3yb24V8M2I+KM+ap3/se33/OmoPGi9u9xVXoTpi03HL9C+A7ga+PiM9g+z/wWiD5Xps9n/ItTdpf1oWufWl5fHI8DRZdk9pe/0Raiz5qn2BvsuSH+Z/S/KvbVMX8T+F+WuL9PHs/+Fv120LvrN+2sB/Dfw4jL9z2VsF+X40vqm4e3Ac8r2rgLevtjGl/qaw8DHc7Z99Fnvelpf9/+CGf16HrdeX5tea52xbDf7rjkMfWwH/gdyMTxoXfn/Hq07Ei5ZwP3+Ja1DuweAb5XHWbTOTd4J7Cw/p1/coPWfHn0feBAYb9vWm4GJ8nhTW/s48FBZ59/o4qJYl7U32BcOx9G6E2Ki/GM5vLQ/u8xPlOXHta1/SalpB213+Mz3awH8BXBvGeP/LP9gFu34Av8CfLds84u0/lAtmvEFvkTresivab0bvXAhxnO2ffRZ7wSt8/LT/+Y+0++49fPa9FLrjOW72RcOQx9bPyEtSaocCtccJEk9MhwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSZX/B5wFI512+HkAAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data.monthly_salary.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1eb739bec18>"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFkdJREFUeJzt3X+MXfV55/H3U5sfFg7YhGRk2WhNttZuSNxSPEu8YlsNEIH5oZo/wooIFZOlspSSVdp6Vcx2u7QJSE5XLCnalNYbvJhsW0PTRljYLGs5jLLVJvwKBOOw1BNjJQ4IK2tDcZKSneTZP+53wmW+985cz1zPPdjvl3Q15zzne8997rm+85nz415HZiJJUrtfGHQDkqTmMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUmT/oBmbqnHPOyeXLl3dc9sMf/pAzzjhjbhvqUZN7g2b31+TeoNn9Nbk3aHZ/J1JvzzzzzA8y8309Dc7Md+Vt1apV2c3jjz/eddmgNbm3zGb31+TeMpvdX5N7y2x2fydSb8DT2ePvWA8rSZIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqPX19RkQcAN4EfgqMZ+ZwRJwNPAgsBw4A/zozj0REAH8CXAX8CLgpM79Z1rMO+A9ltXdk5tZSXwXcDywAdgKfLp/mU58s37ijp3EbVo5zU49je3Fg09V9W5ekuXMsew6XZOYFmTlc5jcCuzNzBbC7zANcCawot/XAvQAlTG4HPgJcBNweEYvLfe4tYyfut2bGz0iSNGuzOay0FthaprcC17bVHyhf5fENYFFELAGuAHZl5uHMPALsAtaUZWdm5tfL3sIDbeuSJA1A9HL0JiJeBo4ACfx5Zm6OiNczc1HbmCOZuTgiHgE2Zebflfpu4FZgBDg9M+8o9T8AfgyMlvEfLfVfBW7NzGs69LGe1h4GQ0NDq7Zt29ax36NHj7Jw4cLetsAcG1Rve77/Rk/jhhbAaz/u3+OuXHpW39bV5NcVmt1fk3uDZvd3IvV2ySWXPNN29GdKvX5l98WZ+UpEvB/YFRH/Z4qx0aGWM6jXxczNwGaA4eHhHBkZ6djA6Ogo3ZYN2qB66/U8woaV49y1p3/f5H7ghpG+ravJrys0u78m9wbN7u9k7a2nw0qZ+Ur5eQj4Cq1zBq+VQ0KUn4fK8IPAuW13Xwa8Mk19WYe6JGlApg2HiDgjIt4zMQ1cDrwAbAfWlWHrgIfL9HbgxmhZDbyRma8CjwGXR8TiciL6cuCxsuzNiFhdrnS6sW1dkqQB6OX4wRDwldbvbeYDf5mZ/yMingIeioibge8C15XxO2ldxjpG61LWTwBk5uGI+CzwVBn3mcw8XKY/yduXsj5abpKkAZk2HDJzP/DLHer/F7isQz2BW7qsawuwpUP9aeDDPfQrSZoDfkJaklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklTpORwiYl5EPBsRj5T58yLiiYjYFxEPRsSppX5amR8ry5e3reO2Un8pIq5oq68ptbGI2Ni/pydJmolj2XP4NPBi2/zngLszcwVwBLi51G8GjmTmLwJ3l3FExPnA9cCHgDXAn5bAmQd8AbgSOB/4eBkrSRqQnsIhIpYBVwNfLPMBXAp8uQzZClxbpteWecryy8r4tcC2zHwrM18GxoCLym0sM/dn5k+AbWWsJGlAet1z+Dzwe8DPyvx7gdczc7zMHwSWlumlwPcAyvI3yvif1yfdp1tdkjQg86cbEBHXAIcy85mIGJkodxia0yzrVu8UUNmhRkSsB9YDDA0NMTo62rHno0ePdl02aIPqbcPK8ekHAUMLeh/bi34+1ya/rtDs/prcGzS7v5O1t2nDAbgY+PWIuAo4HTiT1p7EooiYX/YOlgGvlPEHgXOBgxExHzgLONxWn9B+n271d8jMzcBmgOHh4RwZGenY8OjoKN2WDdqgertp446exm1YOc5de3r5Z9GbAzeM9G1dTX5dodn9Nbk3aHZ/J2tv0x5WyszbMnNZZi6ndUL5q5l5A/A48LEybB3wcJneXuYpy7+amVnq15ermc4DVgBPAk8BK8rVT6eWx9jel2cnSZqR2fyJeCuwLSLuAJ4F7iv1+4AvRcQYrT2G6wEyc29EPAR8GxgHbsnMnwJExKeAx4B5wJbM3DuLviRJs3RM4ZCZo8Bomd5P60qjyWP+Ebiuy/3vBO7sUN8J7DyWXiRJx4+fkJYkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVJl2nCIiNMj4smI+FZE7I2IPyr18yLiiYjYFxEPRsSppX5amR8ry5e3reu2Un8pIq5oq68ptbGI2Nj/pylJOha97Dm8BVyamb8MXACsiYjVwOeAuzNzBXAEuLmMvxk4kpm/CNxdxhER5wPXAx8C1gB/GhHzImIe8AXgSuB84ONlrCRpQKYNh2w5WmZPKbcELgW+XOpbgWvL9NoyT1l+WUREqW/LzLcy82VgDLio3MYyc39m/gTYVsZKkgakp3MO5S/854BDwC7gO8DrmTlehhwElpbppcD3AMryN4D3ttcn3adbXZI0IPN7GZSZPwUuiIhFwFeAD3YaVn5Gl2Xd6p0CKjvUiIj1wHqAoaEhRkdHO/Z79OjRrssGbVC9bVg5Pv0gYGhB72N70c/n2uTXFZrdX5N7g2b3d7L21lM4TMjM1yNiFFgNLIqI+WXvYBnwShl2EDgXOBgR84GzgMNt9Qnt9+lWn/z4m4HNAMPDwzkyMtKxz9HRUbotG7RB9XbTxh09jduwcpy79hzTP4spHbhhpG/ravLrCs3ur8m9QbP7O1l76+VqpfeVPQYiYgHwUeBF4HHgY2XYOuDhMr29zFOWfzUzs9SvL1cznQesAJ4EngJWlKufTqV10np7P56cJGlmevkTcQmwtVxV9AvAQ5n5SER8G9gWEXcAzwL3lfH3AV+KiDFaewzXA2Tm3oh4CPg2MA7cUg5XERGfAh4D5gFbMnNv356hJOmYTRsOmfk88Csd6vtpXWk0uf6PwHVd1nUncGeH+k5gZw/9SpLmgJ+QliRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUqV/H4XVtJZv3MGGleM9f1pZkgbFPQdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUmXacIiIcyPi8Yh4MSL2RsSnS/3siNgVEfvKz8WlHhFxT0SMRcTzEXFh27rWlfH7ImJdW31VROwp97knIuJ4PFlJUm962XMYBzZk5geB1cAtEXE+sBHYnZkrgN1lHuBKYEW5rQfuhVaYALcDHwEuAm6fCJQyZn3b/dbM/qlJkmZq2nDIzFcz85tl+k3gRWApsBbYWoZtBa4t02uBB7LlG8CiiFgCXAHsyszDmXkE2AWsKcvOzMyvZ2YCD7StS5I0ANH6fdzj4IjlwNeADwPfzcxFbcuOZObiiHgE2JSZf1fqu4FbgRHg9My8o9T/APgxMFrGf7TUfxW4NTOv6fD462ntYTA0NLRq27ZtHfs8evQoCxcu7Pl5zZU933+DoQXw2o8H3Ul3/e5v5dKz+raupr6uE5rcX5N7g2b3dyL1dskllzyTmcO9jJ3f60ojYiHwN8BvZ+Y/THFaoNOCnEG9LmZuBjYDDA8P58jISMcGRkdH6bZskG7auIMNK8e5a0/Pm33O9bu/AzeM9G1dTX1dJzS5vyb3Bs3u72TtraffAhFxCq1g+IvM/NtSfi0ilmTmq+XQ0KFSPwic23b3ZcArpT4yqT5a6ss6jJdmZfnGHXP6eBtWjnPTxh0c2HT1nD6udDz0crVSAPcBL2bmf25btB2YuOJoHfBwW/3GctXSauCNzHwVeAy4PCIWlxPRlwOPlWVvRsTq8lg3tq1LkjQAvew5XAz8BrAnIp4rtX8PbAIeioibge8C15VlO4GrgDHgR8AnADLzcER8FniqjPtMZh4u058E7gcWAI+WmyRpQKYNh3JiudsJhss6jE/gli7r2gJs6VB/mtZJbklSA/gJaUlSxXCQJFUMB0lSxXCQJFUMB0lSpbkf1dUJoZ8fRJv4kJmk4889B0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSZdpwiIgtEXEoIl5oq50dEbsiYl/5ubjUIyLuiYixiHg+Ii5su8+6Mn5fRKxrq6+KiD3lPvdERPT7SUqSjk0vew73A2sm1TYCuzNzBbC7zANcCawot/XAvdAKE+B24CPARcDtE4FSxqxvu9/kx5IkzbFpwyEzvwYcnlReC2wt01uBa9vqD2TLN4BFEbEEuALYlZmHM/MIsAtYU5admZlfz8wEHmhblyRpQGZ6zmEoM18FKD/fX+pLge+1jTtYalPVD3aoS5IGaH6f19fpfEHOoN555RHraR2CYmhoiNHR0Y7jjh492nXZIG1YOc7QgtbPpmpyf03uDd7ur4n/9pr6npjQ5P5O1t5mGg6vRcSSzHy1HBo6VOoHgXPbxi0DXin1kUn10VJf1mF8R5m5GdgMMDw8nCMjIx3HjY6O0m3ZIN20cQcbVo5z155+Z3L/NLm/JvcGb/d34IaRQbdSaep7YkKT+ztZe5vpYaXtwMQVR+uAh9vqN5arllYDb5TDTo8Bl0fE4nIi+nLgsbLszYhYXa5SurFtXZKkAZn2z7CI+Ctaf/WfExEHaV11tAl4KCJuBr4LXFeG7wSuAsaAHwGfAMjMwxHxWeCpMu4zmTlxkvuTtK6IWgA8Wm6SpAGaNhwy8+NdFl3WYWwCt3RZzxZgS4f608CHp+tDkjR3/IS0JKliOEiSKoaDJKnS3OsCpXep5Rt3DORxD2y6eiCPqxOTew6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpMpJ+d1Kg/ruG0l6t3DPQZJUOSn3HKQT0VR7xBtWjnPTcdxj9hthTzzuOUiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKn7OQdKszfZbB2b6OQw/X3H8uOcgSaoYDpKkiuEgSap4zkHSu9ZcfMNyt/MhJ/r5DvccJEkVw0GSVGnMYaWIWAP8CTAP+GJmbhpwS5LU1aD+07C5OpzViD2HiJgHfAG4Ejgf+HhEnD/YriTp5NWIcAAuAsYyc39m/gTYBqwdcE+SdNJqSjgsBb7XNn+w1CRJAxCZOegeiIjrgCsy8zfL/G8AF2Xmv500bj2wvsz+M+ClLqs8B/jBcWp3tprcGzS7vyb3Bs3ur8m9QbP7O5F6+yeZ+b5eBjblhPRB4Ny2+WXAK5MHZeZmYPN0K4uIpzNzuH/t9U+Te4Nm99fk3qDZ/TW5N2h2fydrb005rPQUsCIizouIU4Hrge0D7kmSTlqN2HPIzPGI+BTwGK1LWbdk5t4BtyVJJ61GhANAZu4EdvZpddMeehqgJvcGze6vyb1Bs/trcm/Q7P5Oyt4acUJaktQsTTnnIElqkBMqHCJiTUS8FBFjEbFxjh/7QETsiYjnIuLpUjs7InZFxL7yc3GpR0TcU/p8PiIubFvPujJ+X0Ssm2EvWyLiUES80FbrWy8Rsao817Fy3+hDf38YEd8v2++5iLiqbdlt5bFeiogr2uodX+9yYcMTpe8Hy0UOvfZ2bkQ8HhEvRsTeiPh0U7bfFL01ZdudHhFPRsS3Sn9/NNU6I+K0Mj9Wli+fad+z6O3+iHi5bdtdUOqDeF/Mi4hnI+KRRmy3zDwhbrROZH8H+ABwKvAt4Pw5fPwDwDmTan8MbCzTG4HPlemrgEeBAFYDT5T62cD+8nNxmV48g15+DbgQeOF49AI8CfzLcp9HgSv70N8fAv+uw9jzy2t5GnBeeY3nTfV6Aw8B15fpPwM+eQy9LQEuLNPvAf6+9DDw7TdFb03ZdgEsLNOnAE+UbdJxncBvAX9Wpq8HHpxp37Po7X7gYx3GD+J98bvAXwKPTPVazNV2O5H2HJr4FRxrga1leitwbVv9gWz5BrAoIpYAVwC7MvNwZh4BdgFrjvVBM/NrwOHj0UtZdmZmfj1b/yIfaFvXbPrrZi2wLTPfysyXgTFar3XH17v8tXYp8OUOz7WX3l7NzG+W6TeBF2l9Wn/g22+K3rqZ622XmXm0zJ5SbjnFOtu36ZeBy0oPx9T3LHvrZk7fFxGxDLga+GKZn+q1mJPtdiKFw6C/giOB/xkRz0Trk9wAQ5n5KrTe2MD7S71br8fzOfSrl6Vl+nj0+KmyC78lymGbGfT3XuD1zByfbX9ld/1XaP2V2ajtN6k3aMi2K4dGngMO0frF+Z0p1vnzPsryN0oPx+X9Mbm3zJzYdneWbXd3RJw2ubcee5jt6/p54PeAn5X5qV6LOdluJ1I4dDq+N5eXYl2cmRfS+mbZWyLi16YY263XQTyHY+3lePV4L/BPgQuAV4G7BtlfRCwE/gb47cz8h6mGznV/HXprzLbLzJ9m5gW0vuXgIuCDU6xzTvub3FtEfBi4DfjnwL+gdajo1rnuLSKuAQ5l5jPt5SnWNye9nUjh0NNXcBwvmflK+XkI+AqtN8ZrZXeT8vPQNL0ez+fQr14Olum+9piZr5U378+A/0pr+82kvx/QOgQwf1K9ZxFxCq1fvn+RmX9byo3Yfp16a9K2m5CZrwOjtI7Xd1vnz/soy8+idbjxuL4/2npbUw7VZWa+Bfw3Zr7tZvO6Xgz8ekQcoHXI51JaexKD3W7TnZR4t9xofaBvP60TMRMnXT40R499BvCetun/TetcwX/inScx/7hMX807T3Y9mW+f7HqZ1omuxWX67Bn2tJx3nvDtWy+0vu5kNW+feLuqD/0taZv+HVrHTgE+xDtPsu2ndYKt6+sN/DXvPJH3W8fQV9A6Xvz5SfWBb78pemvKtnsfsKhMLwD+F3BNt3UCt/DOE6sPzbTvWfS2pG3bfh7YNOD3xQhvn5Ae6HY77r845/JG6wqDv6d1nPP35/BxP1A2+LeAvROPTes44G5gX/k58Y8oaP3nRt8B9gDDbev6N7ROJI0Bn5hhP39F6/DC/6P1V8PN/ewFGAZeKPf5L5QPU86yvy+Vx3+e1vdqtf/C+/3yWC/RdgVIt9e7vB5Plr7/GjjtGHr7V7R2uZ8Hniu3q5qw/aborSnb7peAZ0sfLwD/cap1AqeX+bGy/AMz7XsWvX21bLsXgP/O21c0zfn7oqxjhLfDYaDbzU9IS5IqJ9I5B0lSnxgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTK/wciBVVS/D/g4wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data[data.monthly_salary<40000].monthly_salary.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1eb74a79780>"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFPdJREFUeJzt3X+s3XV9x/HnWyqoMGkLcte1zQqzYeoaEW6wzMRcxCmgsSwRgyHSYc1dJjM6u7gyk20mS4Y6ppIZtBG1GLUyJqMB1JHKycYfoFaxgsB6xQrXdlTkhzslbhbf++N8ak8vt9xz7j33nN7PeT6Sk/P9fr6f7/l+3ud7ed0vn++5p5GZSJLq9bxBD0CSNL8MekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlFg16AAAnn3xyrlq1quv99u/fz/HHH9/7AR3FrHk4WPPwmEvdO3bseCwzXzJjx8x8zgdwOnBP2+MXwPuApcDtwK7yvKT0D+AaYALYCZw50zHOOuusnI077rhjVvstZNY8HKx5eMylbuA7OUO+ZubMUzeZ+WBmnpGZZwBnAU8DNwGbgO2ZuRrYXtYBLgBWl8c4cO3Mv5ckSfOl2zn684AfZeZPgHXAltK+BbioLK8Dri+/cO4CFkfEsp6MVpLUtW6D/hLgy2V5JDP3ApTnU0r7cuCRtn0mS5skaQAiO/ya4og4FtgDvCIzH42IJzNzcdv2JzJzSUTcCvxDZt5Z2rcDH8jMHVNeb5zW1A4jIyNnbd26tevBN5tNTjjhhK73W8iseThY8/CYS93nnnvujswcnalfN5+6uQD4bmY+WtYfjYhlmbm3TM3sK+2TwMq2/VbQ+gVxmMzcDGwGGB0dzbGxsS6G0tJoNJjNfguZNQ8Hax4e/ai7m6mbt3No2gZgG7C+LK8Hbm5rvyxa1gJPHZzikST1X0dX9BHxIuCPgD9ta74KuCEiNgAPAxeX9tuAC2l9vPJp4PKejVaS1LWOgj4znwZOmtL2c1qfwpnaN4ErejI6SdKc+RUIklS5o+IrEOZi1aZbB3bs3Ve9aWDHlqROeUUvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVa6joI+IxRFxY0Q8EBH3R8Q5EbE0Im6PiF3leUnpGxFxTURMRMTOiDhzfkuQJD2XTq/oPwF8PTN/H3glcD+wCdiemauB7WUd4AJgdXmMA9f2dMSSpK7MGPQR8WLgtcB1AJn5f5n5JLAO2FK6bQEuKsvrgOuz5S5gcUQs6/nIJUkd6eSK/jTgZ8DnIuJ7EfGZiDgeGMnMvQDl+ZTSfznwSNv+k6VNkjQAizrscybwnsy8OyI+waFpmunENG35rE4R47SmdhgZGaHRaHQwlMM1m002rnmm6/16ZTZjnqtmszmQ4w6SNQ+HYawZ+lN3J0E/CUxm5t1l/UZaQf9oRCzLzL1lamZfW/+VbfuvAPZMfdHM3AxsBhgdHc2xsbGuB99oNLj6zv1d79cruy8d6/sxG40Gs3mvFjJrHg7DWDP0p+4Zp24y87+BRyLi9NJ0HvBDYBuwvrStB24uy9uAy8qnb9YCTx2c4pEk9V8nV/QA7wG+GBHHAg8Bl9P6JXFDRGwAHgYuLn1vAy4EJoCnS19J0oB0FPSZeQ8wOs2m86bpm8AVcxyXJKlH/MtYSaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUuY6CPiJ2R8QPIuKeiPhOaVsaEbdHxK7yvKS0R0RcExETEbEzIs6czwIkSc+tmyv6czPzjMwcLeubgO2ZuRrYXtYBLgBWl8c4cG2vBitJ6t5cpm7WAVvK8hbgorb267PlLmBxRCybw3EkSXPQadAn8O8RsSMixkvbSGbuBSjPp5T25cAjbftOljZJ0gAs6rDfazJzT0ScAtweEQ88R9+Ypi2f1an1C2McYGRkhEaj0eFQDmk2m2xc80zX+/XKbMY8V81mcyDHHSRrHg7DWDP0p+6Ogj4z95TnfRFxE3A28GhELMvMvWVqZl/pPgmsbNt9BbBnmtfcDGwGGB0dzbGxsa4H32g0uPrO/V3v1yu7Lx3r+zEbjQazea8WMmseDsNYM/Sn7hmnbiLi+Ij4rYPLwBuAe4FtwPrSbT1wc1neBlxWPn2zFnjq4BSPJKn/OrmiHwFuioiD/b+UmV+PiG8DN0TEBuBh4OLS/zbgQmACeBq4vOejliR1bMagz8yHgFdO0/5z4Lxp2hO4oiejkyTNmX8ZK0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5joM+Io6JiO9FxC1l/dSIuDsidkXEVyLi2NJ+XFmfKNtXzc/QJUmd6OaK/r3A/W3rHwY+lpmrgSeADaV9A/BEZr4U+FjpJ0kakI6CPiJWAG8CPlPWA3gdcGPpsgW4qCyvK+uU7eeV/pKkAej0iv7jwAeAX5f1k4AnM/NAWZ8Elpfl5cAjAGX7U6W/JGkAFs3UISLeDOzLzB0RMXaweZqu2cG29tcdB8YBRkZGaDQanYz3MM1mk41rnul6v16ZzZjnqtlsDuS4g2TNw2EYa4b+1D1j0AOvAd4SERcCLwBeTOsKf3FELCpX7SuAPaX/JLASmIyIRcCJwONTXzQzNwObAUZHR3NsbKzrwTcaDa6+c3/X+/XK7kvH+n7MRqPBbN6rhcyah8Mw1gz9qXvGqZvMvDIzV2TmKuAS4JuZeSlwB/DW0m09cHNZ3lbWKdu/mZnPuqKXJPXHXD5H/1fA+yNigtYc/HWl/TrgpNL+fmDT3IYoSZqLTqZufiMzG0CjLD8EnD1Nn18CF/dgbJKkHvAvYyWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuVmDPqIeEFEfCsivh8R90XEh0r7qRFxd0TsioivRMSxpf24sj5Rtq+a3xIkSc+lkyv6/wVel5mvBM4Azo+ItcCHgY9l5mrgCWBD6b8BeCIzXwp8rPSTJA3IjEGfLc2y+vzySOB1wI2lfQtwUVleV9Yp28+LiOjZiCVJXYnMnLlTxDHADuClwCeBjwJ3lat2ImIl8LXM/IOIuBc4PzMny7YfAa/OzMemvOY4MA4wMjJy1tatW7sefLPZ5MdPPdP1fr2yZvmJfT9ms9nkhBNO6PtxB8mah8Mw1gxzq/vcc8/dkZmjM/Vb1MmLZeYzwBkRsRi4CXjZdN3K83RX78/6bZKZm4HNAKOjozk2NtbJUA7TaDS4+s79Xe/XK7svHev7MRuNBrN5rxYyax4Ow1gz9Kfurj51k5lPAg1gLbA4Ig7+olgB7CnLk8BKgLL9RODxXgxWktS9Tj5185JyJU9EvBB4PXA/cAfw1tJtPXBzWd5W1inbv5mdzA9JkuZFJ1M3y4AtZZ7+ecANmXlLRPwQ2BoRfw98D7iu9L8O+EJETNC6kr9kHsYtSerQjEGfmTuBV03T/hBw9jTtvwQu7snoJElz5l/GSlLlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMp19BUImt6qTbf2/Zgb1xxgrO9HlbSQeUUvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekio3Y9BHxMqIuCMi7o+I+yLivaV9aUTcHhG7yvOS0h4RcU1ETETEzog4c76LkCQdWSdX9AeAjZn5MmAtcEVEvBzYBGzPzNXA9rIOcAGwujzGgWt7PmpJUsdmDPrM3JuZ3y3L/wPcDywH1gFbSrctwEVleR1wfbbcBSyOiGU9H7kkqSNdzdFHxCrgVcDdwEhm7oXWLwPglNJtOfBI226TpU2SNAAd/1OCEXEC8K/A+zLzFxFxxK7TtOU0rzdOa2qHkZERGo1Gp0P5jWazycY1z3S930I28kJm9V4tZM1m05qHwDDWDP2pu6Ogj4jn0wr5L2bmV0vzoxGxLDP3lqmZfaV9EljZtvsKYM/U18zMzcBmgNHR0RwbG+t68I1Gg6vv3N/1fgvZxjUHeNss3quFrNFoMJufj4XMmodHP+ru5FM3AVwH3J+Z/9S2aRuwviyvB25ua7+sfPpmLfDUwSkeSVL/dXJF/xrgHcAPIuKe0vbXwFXADRGxAXgYuLhsuw24EJgAngYu7+mIJUldmTHoM/NOpp93Bzhvmv4JXDHHcUmSesS/jJWkyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZWbMegj4rMRsS8i7m1rWxoRt0fErvK8pLRHRFwTERMRsTMizpzPwUuSZtbJFf3ngfOntG0CtmfmamB7WQe4AFhdHuPAtb0ZpiRptmYM+sz8D+DxKc3rgC1leQtwUVv79dlyF7A4Ipb1arCSpO4tmuV+I5m5FyAz90bEKaV9OfBIW7/J0rZ36gtExDitq35GRkZoNBpdD6LZbLJxzTNd77eQjbyQWb1XC1mz2bTmITCMNUN/6p5t0B9JTNOW03XMzM3AZoDR0dEcGxvr+mCNRoOr79zf9X4L2cY1B3jbLN6rhazRaDCbn4+FzJqHRz/qnu2nbh49OCVTnveV9klgZVu/FcCe2Q9PkjRXsw36bcD6srweuLmt/bLy6Zu1wFMHp3gkSYMx49RNRHwZGANOjohJ4G+Bq4AbImID8DBwcel+G3AhMAE8DVw+D2OWJHVhxqDPzLcfYdN50/RN4Iq5DkqS1Du9vhmrPli16daBHXv3VW8a2LElzY5fgSBJlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1Ll/D56dWUQ34W/cc0Bxvp+VKkeBr0WhEH9Yyv+QyuqgVM3klQ5g16SKmfQS1Ll5iXoI+L8iHgwIiYiYtN8HEOS1Jme34yNiGOATwJ/BEwC346IbZn5w14fS5pvg7oJPKyfNPKm+/yYj0/dnA1MZOZDABGxFVgHGPTSAjDIX26D+iDgoGoG+Pz5x8/7MebjXV0OPNK2Pgm8eh6OI1VtkOGjukRm9vYFIy4G3piZ7yrr7wDOzsz3TOk3DoyX1dOBB2dxuJOBx+Yw3IXImoeDNQ+PudT9u5n5kpk6zccV/SSwsm19BbBnaqfM3AxsnsuBIuI7mTk6l9dYaKx5OFjz8OhH3fPxqZtvA6sj4tSIOBa4BNg2D8eRJHWg51f0mXkgIv4c+AZwDPDZzLyv18eRJHVmXm5xZ+ZtwG3z8dpTzGnqZ4Gy5uFgzcNj3uvu+c1YSdLRxa9AkKTKHTVBHxHHRMT3IuKWsn5qRNwdEbsi4ivlxi4RcVxZnyjbV7W9xpWl/cGIeGNb+1H3lQwRsTgiboyIByLi/og4JyKWRsTtpebbI2JJ6RsRcU0Z/86IOLPtddaX/rsiYn1b+1kR8YOyzzUREYOos11E/EVE3BcR90bElyPiBTWe54j4bETsi4h729rm/dwe6RgDrPmj5ed7Z0TcFBGL27Z1dQ5n83My36aruW3bX0ZERsTJZX2w5zkzj4oH8H7gS8AtZf0G4JKy/Cngz8ryu4FPleVLgK+U5ZcD3weOA04FfkTrZvAxZfk04NjS5+VHQb1bgHeV5WOBxcBHgE2lbRPw4bJ8IfA1IIC1wN2lfSnwUHleUpaXlG3fAs4p+3wNuGDA9S4Hfgy8sO38/kmN5xl4LXAmcG9b27yf2yMdY4A1vwFYVJY/3FZz1+ew25+TQdVc2lfS+jDKT4CTj4bzPLD/8Ke8MSuA7cDrgFtKYY+1/ZCcA3yjLH8DOKcsLyr9ArgSuLLtNb9R9vvNvqX9sH4DqvfFtEIvprQ/CCwry8uAB8vyp4G3T+0HvB34dFv7p0vbMuCBtvbD+g2o5oN/Mb20nLdbgDfWep6BVRweevN+bo90jEHVPGXbHwNfnO7czHQOZ5MHg6wZuBF4JbCbQ0E/0PN8tEzdfBz4APDrsn4S8GRmHijrk7SCAtq+YqFsf6r0n+6rF5Y/R/sgnQb8DPhctKarPhMRxwMjmbkXoDyfUvp3W9vysjy1fWAy86fAPwIPA3tpnbcd1H2e2/Xj3B7pGEeDd9K6KoXua55NHgxERLwF+Glmfn/KpoGe54EHfUS8GdiXmTvam6fpmjNs67Z9kBbR+l++azPzVcB+Wv8LdiQLvuYyj7iO1v+q/w5wPHDBNF1rOs+dqL7OiPggcAD44sGmabrNtuaj5v2IiBcBHwT+ZrrN07T17TwPPOiB1wBviYjdwFZa0zcfBxZHxMHP+bd/jcJvvmKhbD8ReJwjf/VCR1/J0GeTwGRm3l3Wb6QV/I9GxDKA8ryvrX83tU2W5antg/R64MeZ+bPM/BXwVeAPqfs8t+vHuT3SMQam3Fx8M3BplrkGuq/5Mbr/ORmE36N1IfP9kmcrgO9GxG8z6PPczzm8Dua7xjh0M/ZfOPzmy7vL8hUcfvPlhrL8Cg6/wfMQrZs7i8ryqRy6wfOKo6DW/wROL8t/B3y0PNpvsnykLL+Jw2/kfKu0L6U117+kPH4MLC3bvl36HryRc+GA6301cB/wojKmLcB7aj3PPHuOft7P7ZGOMcCaz6f19eQvmdKv63PY7c/JoGqesm03h+boB3qeB/Yf/hHemDEOBf1ptO46T5STfFxpf0FZnyjbT2vb/4O07to/SNunTGjd8f6vsu2Dg66zjOkM4DvATuDfykk+idZN6V3l+eAJD1r/mMuPgB8Ao22v887yXkwAl7e1jwL3ln3+mT7eoHqOmj8EPFDG9YXyH3p15xn4Mq37EL+idWW2oR/n9kjHGGDNE7Tmn+8pj0/N9hzO5udkEDVP2b6bQ0E/0PPsX8ZKUuWOhjl6SdI8MuglqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6Sarc/wND/ZkXtt3hxQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data[data.monthly_salary>40000].monthly_salary.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1000.0"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.monthly_salary.min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "140000.0"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.monthly_salary.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12500.0"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.monthly_salary.median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13755.868728773368"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.monthly_salary.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NormaltestResult(statistic=80485.89395778668, pvalue=0.0)"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.normaltest(data.monthly_salary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "?stats.normaltest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,11,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NormaltestResult(statistic=112.24649953711291, pvalue=4.226514069652413e-25)"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.normaltest(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "null hypothesis: x comes from a normal distribution\n",
    "p=0\n",
    "The null hypothesis can be rejected\n",
    "conclusion: data is not normally distributed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
